Text Generation
Transformers
Safetensors
llama
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use PatrickG1014/alf_plan_sft with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use PatrickG1014/alf_plan_sft with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="PatrickG1014/alf_plan_sft") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("PatrickG1014/alf_plan_sft") model = AutoModelForCausalLM.from_pretrained("PatrickG1014/alf_plan_sft") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use PatrickG1014/alf_plan_sft with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "PatrickG1014/alf_plan_sft" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "PatrickG1014/alf_plan_sft", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/PatrickG1014/alf_plan_sft
- SGLang
How to use PatrickG1014/alf_plan_sft with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "PatrickG1014/alf_plan_sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "PatrickG1014/alf_plan_sft", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "PatrickG1014/alf_plan_sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "PatrickG1014/alf_plan_sft", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use PatrickG1014/alf_plan_sft with Docker Model Runner:
docker model run hf.co/PatrickG1014/alf_plan_sft
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 35.48504262720802, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 1.0899, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 16.48187608589097, | |
| "learning_rate": 2.5333333333333338e-06, | |
| "loss": 0.5249, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 10.468238992764016, | |
| "learning_rate": 3.866666666666667e-06, | |
| "loss": 0.3168, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 9.024637955946732, | |
| "learning_rate": 5.2e-06, | |
| "loss": 0.3364, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 6.073313045678999, | |
| "learning_rate": 6.533333333333334e-06, | |
| "loss": 0.2806, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 5.687855097745657, | |
| "learning_rate": 7.866666666666667e-06, | |
| "loss": 0.2787, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 7.1809472777573635, | |
| "learning_rate": 9.200000000000002e-06, | |
| "loss": 0.3306, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 7.194716178236872, | |
| "learning_rate": 9.99913355769784e-06, | |
| "loss": 0.3409, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 4.198748238410053, | |
| "learning_rate": 9.989389530100242e-06, | |
| "loss": 0.3485, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 7.892891880606895, | |
| "learning_rate": 9.968839595802982e-06, | |
| "loss": 0.3538, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 6.998920140370734, | |
| "learning_rate": 9.937528261387753e-06, | |
| "loss": 0.3176, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 7.374807966049285, | |
| "learning_rate": 9.89552334023258e-06, | |
| "loss": 0.3123, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 6.20558262495305, | |
| "learning_rate": 9.842915805643156e-06, | |
| "loss": 0.3996, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 9.62426466622868, | |
| "learning_rate": 9.779819593824909e-06, | |
| "loss": 0.297, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 8.635680958421968, | |
| "learning_rate": 9.70637135712256e-06, | |
| "loss": 0.31, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 3.232390024132561, | |
| "learning_rate": 9.622730168061568e-06, | |
| "loss": 0.2816, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 3.695162741603393, | |
| "learning_rate": 9.529077174832466e-06, | |
| "loss": 0.2902, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 2.8436007785521564, | |
| "learning_rate": 9.425615208964217e-06, | |
| "loss": 0.286, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 3.087524553818655, | |
| "learning_rate": 9.312568346036288e-06, | |
| "loss": 0.3148, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 4.601577314459775, | |
| "learning_rate": 9.190181420380838e-06, | |
| "loss": 0.306, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 2.909610972090332, | |
| "learning_rate": 9.058719494826076e-06, | |
| "loss": 0.3021, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 4.340156720044649, | |
| "learning_rate": 8.9184672866292e-06, | |
| "loss": 0.3793, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 4.517487511541375, | |
| "learning_rate": 8.769728550842217e-06, | |
| "loss": 0.3205, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 5.479883695257259, | |
| "learning_rate": 8.61282542244614e-06, | |
| "loss": 0.2903, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.455068019285842, | |
| "learning_rate": 8.44809771867835e-06, | |
| "loss": 0.2974, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 3.6092951322383837, | |
| "learning_rate": 8.275902203064125e-06, | |
| "loss": 0.1821, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 4.192933227036564, | |
| "learning_rate": 8.096611812746302e-06, | |
| "loss": 0.2386, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 3.3167996311633456, | |
| "learning_rate": 7.910614850786448e-06, | |
| "loss": 0.2272, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 4.068655472873125, | |
| "learning_rate": 7.718314145186918e-06, | |
| "loss": 0.167, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 4.007296325372359, | |
| "learning_rate": 7.520126176455084e-06, | |
| "loss": 0.2593, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 2.817920804738045, | |
| "learning_rate": 7.31648017559931e-06, | |
| "loss": 0.1997, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.7743725972139743, | |
| "learning_rate": 7.107817194510157e-06, | |
| "loss": 0.2198, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 2.8263528430159073, | |
| "learning_rate": 6.8945891507402075e-06, | |
| "loss": 0.2121, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 2.76737686079553, | |
| "learning_rate": 6.677257848751276e-06, | |
| "loss": 0.2219, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 2.9387440745751534, | |
| "learning_rate": 6.456293979748778e-06, | |
| "loss": 0.1814, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 2.9525223454355887, | |
| "learning_rate": 6.23217610226939e-06, | |
| "loss": 0.2227, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 3.8813673476773336, | |
| "learning_rate": 6.005389605729824e-06, | |
| "loss": 0.221, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 3.0211148351187442, | |
| "learning_rate": 5.776425659181438e-06, | |
| "loss": 0.2186, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 3.353673660930103, | |
| "learning_rate": 5.54578014754744e-06, | |
| "loss": 0.2259, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.759096393662257, | |
| "learning_rate": 5.3139525976465675e-06, | |
| "loss": 0.1703, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 2.5075521908308627, | |
| "learning_rate": 5.081445096329229e-06, | |
| "loss": 0.2054, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 2.667218907894434, | |
| "learning_rate": 4.8487612030691975e-06, | |
| "loss": 0.1908, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 3.7467375716198794, | |
| "learning_rate": 4.6164048593659076e-06, | |
| "loss": 0.1961, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 2.0673735404525795, | |
| "learning_rate": 4.384879297319398e-06, | |
| "loss": 0.1939, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 2.977965453692763, | |
| "learning_rate": 4.154685949741631e-06, | |
| "loss": 0.2254, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 4.207758072866126, | |
| "learning_rate": 3.926323364164684e-06, | |
| "loss": 0.2024, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 3.8669669126524124, | |
| "learning_rate": 3.700286123097814e-06, | |
| "loss": 0.1879, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 3.5441123121527967, | |
| "learning_rate": 3.4770637728718608e-06, | |
| "loss": 0.1948, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 3.2798797246303466, | |
| "learning_rate": 3.2571397633909252e-06, | |
| "loss": 0.2235, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.2871674589145363, | |
| "learning_rate": 3.040990401087508e-06, | |
| "loss": 0.2277, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 3.0673565792695654, | |
| "learning_rate": 2.82908381734886e-06, | |
| "loss": 0.1267, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 2.6046689356940163, | |
| "learning_rate": 2.6218789546486235e-06, | |
| "loss": 0.1412, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 2.5729189110537, | |
| "learning_rate": 2.4198245725796427e-06, | |
| "loss": 0.1202, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 3.502942746638977, | |
| "learning_rate": 2.2233582759406065e-06, | |
| "loss": 0.1407, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 2.9354585601623295, | |
| "learning_rate": 2.0329055669814936e-06, | |
| "loss": 0.1324, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 2.388511910600407, | |
| "learning_rate": 1.8488789238604676e-06, | |
| "loss": 0.1358, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 1.875782926642573, | |
| "learning_rate": 1.671676907308018e-06, | |
| "loss": 0.0976, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 2.464354656406682, | |
| "learning_rate": 1.5016832974331725e-06, | |
| "loss": 0.1121, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 2.2360057587719795, | |
| "learning_rate": 1.339266262541249e-06, | |
| "loss": 0.1228, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 2.2693254276644237, | |
| "learning_rate": 1.1847775617632746e-06, | |
| "loss": 0.1613, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 2.415277875645134, | |
| "learning_rate": 1.0385517832240472e-06, | |
| "loss": 0.1337, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 2.6495067745174463, | |
| "learning_rate": 9.009056193987569e-07, | |
| "loss": 0.0975, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 2.1271611125817196, | |
| "learning_rate": 7.72137181227608e-07, | |
| "loss": 0.1073, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 2.383144498016058, | |
| "learning_rate": 6.52525352473905e-07, | |
| "loss": 0.1216, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 2.31305492224638, | |
| "learning_rate": 5.423291857239177e-07, | |
| "loss": 0.1255, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 2.819752170788764, | |
| "learning_rate": 4.417873413366702e-07, | |
| "loss": 0.1121, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 1.8190497192872692, | |
| "learning_rate": 3.511175705587433e-07, | |
| "loss": 0.1003, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 2.4397321549581075, | |
| "learning_rate": 2.705162439235648e-07, | |
| "loss": 0.1226, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 2.1508054212933962, | |
| "learning_rate": 2.0015792595656225e-07, | |
| "loss": 0.1546, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 3.6112413918959363, | |
| "learning_rate": 1.4019499710726913e-07, | |
| "loss": 0.1267, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 3.5252224980555376, | |
| "learning_rate": 9.075732372720414e-08, | |
| "loss": 0.121, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 2.4820036298044506, | |
| "learning_rate": 5.19519768082738e-08, | |
| "loss": 0.137, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.9548504439015364, | |
| "learning_rate": 2.386300009084408e-08, | |
| "loss": 0.1089, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 2.013463783227666, | |
| "learning_rate": 6.551228043715218e-09, | |
| "loss": 0.1258, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.6783259480742636, | |
| "learning_rate": 5.4154110206150465e-11, | |
| "loss": 0.1466, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 750, | |
| "total_flos": 1930678763520.0, | |
| "train_loss": 0.23004729866981508, | |
| "train_runtime": 1545.5894, | |
| "train_samples_per_second": 1.941, | |
| "train_steps_per_second": 0.485 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1930678763520.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |