Text Generation
Transformers
Safetensors
English
Chinese
qwen3
fine-tuned
structured-memory
conversational-ai
conversational
text-generation-inference
Instructions to use wtqiu/DimMem-4B-Locomo with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use wtqiu/DimMem-4B-Locomo with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="wtqiu/DimMem-4B-Locomo") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("wtqiu/DimMem-4B-Locomo") model = AutoModelForCausalLM.from_pretrained("wtqiu/DimMem-4B-Locomo") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use wtqiu/DimMem-4B-Locomo with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "wtqiu/DimMem-4B-Locomo" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wtqiu/DimMem-4B-Locomo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/wtqiu/DimMem-4B-Locomo
- SGLang
How to use wtqiu/DimMem-4B-Locomo with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "wtqiu/DimMem-4B-Locomo" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wtqiu/DimMem-4B-Locomo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "wtqiu/DimMem-4B-Locomo" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wtqiu/DimMem-4B-Locomo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use wtqiu/DimMem-4B-Locomo with Docker Model Runner:
docker model run hf.co/wtqiu/DimMem-4B-Locomo
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 200.0, | |
| "global_step": 354, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005649717514124294, | |
| "grad_norm": 1.8059313297271729, | |
| "learning_rate": 4.999901553476555e-05, | |
| "loss": 0.4717830717563629, | |
| "step": 1, | |
| "token_acc": 0.924627166465135 | |
| }, | |
| { | |
| "epoch": 0.05649717514124294, | |
| "grad_norm": 0.1542048305273056, | |
| "learning_rate": 4.9901617425775067e-05, | |
| "loss": 0.33008625772264266, | |
| "step": 10, | |
| "token_acc": 0.9219071687140808 | |
| }, | |
| { | |
| "epoch": 0.11299435028248588, | |
| "grad_norm": 0.11267846822738647, | |
| "learning_rate": 4.9607244033573156e-05, | |
| "loss": 0.25351030826568605, | |
| "step": 20, | |
| "token_acc": 0.9264000170247177 | |
| }, | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 0.09091860800981522, | |
| "learning_rate": 4.91191967203629e-05, | |
| "loss": 0.22513890266418457, | |
| "step": 30, | |
| "token_acc": 0.9317765577689588 | |
| }, | |
| { | |
| "epoch": 0.22598870056497175, | |
| "grad_norm": 0.08639991283416748, | |
| "learning_rate": 4.84413167142257e-05, | |
| "loss": 0.20885028839111328, | |
| "step": 40, | |
| "token_acc": 0.9359174715592828 | |
| }, | |
| { | |
| "epoch": 0.2824858757062147, | |
| "grad_norm": 0.09375399351119995, | |
| "learning_rate": 4.7578939341563095e-05, | |
| "loss": 0.19784480333328247, | |
| "step": 50, | |
| "token_acc": 0.9384661788621316 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 0.08870115131139755, | |
| "learning_rate": 4.653885203484515e-05, | |
| "loss": 0.18448562622070314, | |
| "step": 60, | |
| "token_acc": 0.9420314698252165 | |
| }, | |
| { | |
| "epoch": 0.3954802259887006, | |
| "grad_norm": 0.11067840456962585, | |
| "learning_rate": 4.532924091140417e-05, | |
| "loss": 0.18294379711151124, | |
| "step": 70, | |
| "token_acc": 0.9418481147105683 | |
| }, | |
| { | |
| "epoch": 0.4519774011299435, | |
| "grad_norm": 0.09862152487039566, | |
| "learning_rate": 4.395962634373097e-05, | |
| "loss": 0.17243103981018065, | |
| "step": 80, | |
| "token_acc": 0.9446352200693965 | |
| }, | |
| { | |
| "epoch": 0.5084745762711864, | |
| "grad_norm": 0.10678666085004807, | |
| "learning_rate": 4.2440788028374624e-05, | |
| "loss": 0.1731430172920227, | |
| "step": 90, | |
| "token_acc": 0.9447363875815018 | |
| }, | |
| { | |
| "epoch": 0.5649717514124294, | |
| "grad_norm": 0.10532315075397491, | |
| "learning_rate": 4.0784680143198836e-05, | |
| "loss": 0.17281131744384765, | |
| "step": 100, | |
| "token_acc": 0.9447437022704439 | |
| }, | |
| { | |
| "epoch": 0.6214689265536724, | |
| "grad_norm": 0.11211931705474854, | |
| "learning_rate": 3.900433726075865e-05, | |
| "loss": 0.16182489395141603, | |
| "step": 110, | |
| "token_acc": 0.9481675818843257 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.10726941376924515, | |
| "learning_rate": 3.711377175831626e-05, | |
| "loss": 0.16202739477157593, | |
| "step": 120, | |
| "token_acc": 0.9479687034245692 | |
| }, | |
| { | |
| "epoch": 0.7344632768361582, | |
| "grad_norm": 0.10400757193565369, | |
| "learning_rate": 3.512786353194134e-05, | |
| "loss": 0.15924739837646484, | |
| "step": 130, | |
| "token_acc": 0.9486699455285843 | |
| }, | |
| { | |
| "epoch": 0.7909604519774012, | |
| "grad_norm": 0.10787644982337952, | |
| "learning_rate": 3.3062242882712724e-05, | |
| "loss": 0.15439069271087646, | |
| "step": 140, | |
| "token_acc": 0.9498849158473873 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 0.10836105048656464, | |
| "learning_rate": 3.093316749677788e-05, | |
| "loss": 0.15822217464447022, | |
| "step": 150, | |
| "token_acc": 0.9487123526844173 | |
| }, | |
| { | |
| "epoch": 0.903954802259887, | |
| "grad_norm": 0.10872391611337662, | |
| "learning_rate": 2.875739448751176e-05, | |
| "loss": 0.15569958686828614, | |
| "step": 160, | |
| "token_acc": 0.9491473105803542 | |
| }, | |
| { | |
| "epoch": 0.96045197740113, | |
| "grad_norm": 0.11218578368425369, | |
| "learning_rate": 2.655204850688085e-05, | |
| "loss": 0.15528473854064942, | |
| "step": 170, | |
| "token_acc": 0.9494661997922623 | |
| }, | |
| { | |
| "epoch": 1.0169491525423728, | |
| "grad_norm": 0.13003411889076233, | |
| "learning_rate": 2.433448696405563e-05, | |
| "loss": 0.1492830991744995, | |
| "step": 180, | |
| "token_acc": 0.9511860316683133 | |
| }, | |
| { | |
| "epoch": 1.073446327683616, | |
| "grad_norm": 0.12652547657489777, | |
| "learning_rate": 2.2122163412082927e-05, | |
| "loss": 0.14996984004974365, | |
| "step": 190, | |
| "token_acc": 0.9509753894028877 | |
| }, | |
| { | |
| "epoch": 1.1299435028248588, | |
| "grad_norm": 0.11710216104984283, | |
| "learning_rate": 1.993249017784766e-05, | |
| "loss": 0.149368953704834, | |
| "step": 200, | |
| "token_acc": 0.9509840746795515 | |
| }, | |
| { | |
| "epoch": 1.1864406779661016, | |
| "grad_norm": 0.12813611328601837, | |
| "learning_rate": 1.778270131650948e-05, | |
| "loss": 0.1482247829437256, | |
| "step": 210, | |
| "token_acc": 0.9514638991717056 | |
| }, | |
| { | |
| "epoch": 1.2429378531073447, | |
| "grad_norm": 0.12335359305143356, | |
| "learning_rate": 1.5689716969045848e-05, | |
| "loss": 0.14458421468734742, | |
| "step": 220, | |
| "token_acc": 0.9528204997080846 | |
| }, | |
| { | |
| "epoch": 1.2994350282485876, | |
| "grad_norm": 0.1252630650997162, | |
| "learning_rate": 1.3670010190490073e-05, | |
| "loss": 0.14932241439819335, | |
| "step": 230, | |
| "token_acc": 0.950983923940499 | |
| }, | |
| { | |
| "epoch": 1.3559322033898304, | |
| "grad_norm": 0.12602832913398743, | |
| "learning_rate": 1.173947729700644e-05, | |
| "loss": 0.14413282871246338, | |
| "step": 240, | |
| "token_acc": 0.9524894618411184 | |
| }, | |
| { | |
| "epoch": 1.4124293785310735, | |
| "grad_norm": 0.1404254138469696, | |
| "learning_rate": 9.913312752249903e-06, | |
| "loss": 0.14448442459106445, | |
| "step": 250, | |
| "token_acc": 0.9523784878342272 | |
| }, | |
| { | |
| "epoch": 1.4689265536723164, | |
| "grad_norm": 0.1297323852777481, | |
| "learning_rate": 8.20588957773018e-06, | |
| "loss": 0.1442911744117737, | |
| "step": 260, | |
| "token_acc": 0.9526485262065045 | |
| }, | |
| { | |
| "epoch": 1.5254237288135593, | |
| "grad_norm": 0.1164567619562149, | |
| "learning_rate": 6.6306462284233234e-06, | |
| "loss": 0.1476944088935852, | |
| "step": 270, | |
| "token_acc": 0.9516979818914234 | |
| }, | |
| { | |
| "epoch": 1.5819209039548023, | |
| "grad_norm": 0.12876106798648834, | |
| "learning_rate": 5.199980823988157e-06, | |
| "loss": 0.14429720640182495, | |
| "step": 280, | |
| "token_acc": 0.9527345847326476 | |
| }, | |
| { | |
| "epoch": 1.6384180790960452, | |
| "grad_norm": 0.12522290647029877, | |
| "learning_rate": 3.925153568052123e-06, | |
| "loss": 0.14247846603393555, | |
| "step": 290, | |
| "token_acc": 0.952577761791889 | |
| }, | |
| { | |
| "epoch": 1.694915254237288, | |
| "grad_norm": 0.1215895265340805, | |
| "learning_rate": 2.8161981235857143e-06, | |
| "loss": 0.14371044635772706, | |
| "step": 300, | |
| "token_acc": 0.9530261029770724 | |
| }, | |
| { | |
| "epoch": 1.7514124293785311, | |
| "grad_norm": 0.13429652154445648, | |
| "learning_rate": 1.881842641895104e-06, | |
| "loss": 0.1435616970062256, | |
| "step": 310, | |
| "token_acc": 0.9526384206465796 | |
| }, | |
| { | |
| "epoch": 1.807909604519774, | |
| "grad_norm": 0.12633894383907318, | |
| "learning_rate": 1.129441066782702e-06, | |
| "loss": 0.14912809133529664, | |
| "step": 320, | |
| "token_acc": 0.9509201261393581 | |
| }, | |
| { | |
| "epoch": 1.8644067796610169, | |
| "grad_norm": 0.12346093356609344, | |
| "learning_rate": 5.649152545533332e-07, | |
| "loss": 0.14490561485290526, | |
| "step": 330, | |
| "token_acc": 0.9519967728922952 | |
| }, | |
| { | |
| "epoch": 1.92090395480226, | |
| "grad_norm": 0.123548224568367, | |
| "learning_rate": 1.927083654168854e-07, | |
| "loss": 0.14034559726715087, | |
| "step": 340, | |
| "token_acc": 0.9537333066731213 | |
| }, | |
| { | |
| "epoch": 1.9774011299435028, | |
| "grad_norm": 0.1290796846151352, | |
| "learning_rate": 1.5749893125160954e-08, | |
| "loss": 0.14156577587127686, | |
| "step": 350, | |
| "token_acc": 0.9530219643471618 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 354, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.089589125686231e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |