| LICENSE |
| README.md |
| setup.py |
| src/llmtuner/__init__.py |
| src/llmtuner.egg-info/PKG-INFO |
| src/llmtuner.egg-info/SOURCES.txt |
| src/llmtuner.egg-info/dependency_links.txt |
| src/llmtuner.egg-info/requires.txt |
| src/llmtuner.egg-info/top_level.txt |
| src/llmtuner/compression/__init__.py |
| src/llmtuner/compression/tuner.py |
| src/llmtuner/compression/utils.py |
| src/llmtuner/compression/prune/__init__.py |
| src/llmtuner/compression/prune/block_drop.py |
| src/llmtuner/compression/prune/io.py |
| src/llmtuner/compression/prune/layer_drop.py |
| src/llmtuner/compression/prune/utils.py |
| src/llmtuner/compression/prune/workflow.py |
| src/llmtuner/compression/prune/wrapper.py |
| src/llmtuner/compression/prune/models/__init__.py |
| src/llmtuner/compression/prune/models/configuration_deepseek.py |
| src/llmtuner/compression/prune/models/configuration_dropped_baichuan.py |
| src/llmtuner/compression/prune/models/configuration_dropped_gemma2.py |
| src/llmtuner/compression/prune/models/configuration_dropped_llama.py |
| src/llmtuner/compression/prune/models/configuration_dropped_mistral.py |
| src/llmtuner/compression/prune/models/modeling_dropped_baichuan.py |
| src/llmtuner/compression/prune/models/modeling_dropped_deepseek.py |
| src/llmtuner/compression/prune/models/modeling_dropped_gemma2.py |
| src/llmtuner/compression/prune/models/modeling_dropped_llama.py |
| src/llmtuner/compression/prune/models/modeling_dropped_mistral.py |
| src/llmtuner/compression/quantization/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/quantize.py |
| src/llmtuner/compression/quantization/AutoAWQ/setup.py |
| src/llmtuner/compression/quantization/AutoAWQ/AutoAWQ_kernels/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/AutoAWQ_kernels/setup.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/evaluation/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/evaluation/eval_utils.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/evaluation/humaneval_utils.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/evaluation/kl_divergence.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/_config.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/aquila.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/auto.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/baichuan.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/base.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/bloom.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/deepseek.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/falcon.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/gemma.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/gpt_bigcode.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/gpt_neox.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/gptj.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/llama.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/llava.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/mistral.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/mixtral.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/mpt.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/opt.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/qwen.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/qwen2.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/stablelm.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/starcoder2.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/yi.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/deepseek_moe/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/deepseek_moe/configuration_deepseek.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/models/deepseek_moe/modeling_deepseek.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/act.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/fused/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/fused/attn.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/fused/block.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/fused/cache.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/fused/mlp.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/fused/model.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/fused/moe.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/fused/norm.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/linear/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/linear/exllama.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/linear/exllamav2.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/linear/gemm.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/linear/gemv.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/linear/gemv_fast.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/modules/linear/marlin.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/quantize/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/quantize/quantizer.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/quantize/scale.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/utils/__init__.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/utils/calib_data.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/utils/fused_utils.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/utils/module.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/utils/packing_utils.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/utils/parallel.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/utils/quant_utils.py |
| src/llmtuner/compression/quantization/AutoAWQ/awq/utils/utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/datautils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/gptq.py |
| src/llmtuner/compression/quantization/AutoGPTQ/modelutils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/quant.py |
| src/llmtuner/compression/quantization/AutoGPTQ/quantize.py |
| src/llmtuner/compression/quantization/AutoGPTQ/setup_cuda.py |
| src/llmtuner/compression/quantization/AutoGPTQ/test_kernel.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/eval_tasks/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/eval_tasks/_base.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/eval_tasks/language_modeling_task.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/eval_tasks/sequence_classification_task.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/eval_tasks/text_summarization_task.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/eval_tasks/_utils/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/eval_tasks/_utils/classification_utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/eval_tasks/_utils/generation_utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/_base.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/_const.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/_utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/auto.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/baichuan.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/bloom.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/codegen.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/decilm.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/deepseek.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/gemma.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/gpt2.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/gpt_bigcode.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/gpt_neox.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/gptj.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/internlm.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/llama.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/longllama.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/mistral.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/mixtral.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/moss.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/opt.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/qwen.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/qwen2.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/rw.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/stablelmepoch.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/xverse.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/modeling/yi.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/_fused_base.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/fused_gptj_attn.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/fused_llama_attn.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/fused_llama_mlp.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/qlinear/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/qlinear/qlinear_cuda.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/qlinear/qlinear_cuda_old.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/qlinear/qlinear_exllama.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/qlinear/qlinear_exllamav2.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/qlinear/qlinear_marlin.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/qlinear/qlinear_qigen.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/qlinear/qlinear_triton.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/triton_utils/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/triton_utils/custom_autotune.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/triton_utils/kernels.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/nn_modules/triton_utils/mixin.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/quantization/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/quantization/gptq.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/quantization/quantizer.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/utils/__init__.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/utils/data_utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/utils/exllama_utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/utils/import_utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/utils/marlin_utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/utils/modeling_utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/utils/peft_utils.py |
| src/llmtuner/compression/quantization/AutoGPTQ/auto_gptq/utils/perplexity_utils.py |
| src/llmtuner/data/__init__.py |
| src/llmtuner/data/aligner.py |
| src/llmtuner/data/formatter.py |
| src/llmtuner/data/loader.py |
| src/llmtuner/data/parser.py |
| src/llmtuner/data/preprocess.py |
| src/llmtuner/data/template.py |
| src/llmtuner/data/test_data.py |
| src/llmtuner/data/utils.py |
| src/llmtuner/extras/__init__.py |
| src/llmtuner/extras/callbacks.py |
| src/llmtuner/extras/constants.py |
| src/llmtuner/extras/logging.py |
| src/llmtuner/extras/misc.py |
| src/llmtuner/extras/packages.py |
| src/llmtuner/extras/ploting.py |
| src/llmtuner/extras/patches/__init__.py |
| src/llmtuner/extras/patches/llama_patch.py |
| src/llmtuner/extras/patches/mixtral_patch.py |
| src/llmtuner/hparams/__init__.py |
| src/llmtuner/hparams/data_args.py |
| src/llmtuner/hparams/evaluation_args.py |
| src/llmtuner/hparams/finetuning_args.py |
| src/llmtuner/hparams/generating_args.py |
| src/llmtuner/hparams/model_args.py |
| src/llmtuner/hparams/parser.py |
| src/llmtuner/hparams/pruning_args.py |
| src/llmtuner/model/__init__.py |
| src/llmtuner/model/adapter.py |
| src/llmtuner/model/loader.py |
| src/llmtuner/model/patcher.py |
| src/llmtuner/model/utils.py |