Text Generation
Transformers
ONNX
Safetensors
English
qwen2
dictation
cleanup
transcript
lora
mumble
conversational
text-generation-inference
Instructions to use adikuma/mumble-cleanup with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use adikuma/mumble-cleanup with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="adikuma/mumble-cleanup") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("adikuma/mumble-cleanup") model = AutoModelForCausalLM.from_pretrained("adikuma/mumble-cleanup") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use adikuma/mumble-cleanup with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "adikuma/mumble-cleanup" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "adikuma/mumble-cleanup", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/adikuma/mumble-cleanup
- SGLang
How to use adikuma/mumble-cleanup with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "adikuma/mumble-cleanup" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "adikuma/mumble-cleanup", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "adikuma/mumble-cleanup" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "adikuma/mumble-cleanup", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use adikuma/mumble-cleanup with Docker Model Runner:
docker model run hf.co/adikuma/mumble-cleanup
| # push the trained cleanup model to the huggingface hub. | |
| # uploads the merged transformers model + fp32 onnx + int8 onnx + model card | |
| # in one commit. needs HF_TOKEN in .env.local (gitignored). | |
| # | |
| # usage: uv run python scripts/push_to_hub.py --run-id r1 | |
| # by default the repo is private. pass --public to flip it. | |
| import argparse | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from huggingface_hub import CommitOperationAdd, HfApi | |
| # files copied from runs/<run-id>/merged into the repo root. | |
| MODEL_FILES = [ | |
| "config.json", | |
| "model.safetensors", | |
| "tokenizer.json", | |
| "tokenizer_config.json", | |
| "special_tokens_map.json", | |
| "vocab.json", | |
| "merges.txt", | |
| ] | |
| def _load_dotenv(path: Path) -> None: | |
| if not path.exists(): | |
| return | |
| for line in path.read_text(encoding="utf-8").splitlines(): | |
| line = line.strip() | |
| if not line or line.startswith("#") or "=" not in line: | |
| continue | |
| key, value = line.split("=", 1) | |
| key = key.strip() | |
| value = value.strip().strip('"').strip("'") | |
| if key and key not in os.environ: | |
| os.environ[key] = value | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="publish cleanup model to the hf hub") | |
| parser.add_argument("--runs-dir", default="runs") | |
| parser.add_argument("--run-id", required=True) | |
| parser.add_argument("--repo", default="adikuma/mumble-cleanup") | |
| parser.add_argument("--card", default="docs/model_card.md") | |
| parser.add_argument("--public", action="store_true", help="make the repo public") | |
| args = parser.parse_args() | |
| _load_dotenv(Path(".env.local")) | |
| token = os.environ.get("HF_TOKEN") | |
| if not token: | |
| print("error: HF_TOKEN not set. add it to .env.local", file=sys.stderr) | |
| sys.exit(1) | |
| run_dir = Path(args.runs_dir) / args.run_id | |
| merged_dir = run_dir / "merged" | |
| fp32_onnx = run_dir / "onnx" / "model.onnx" | |
| int8_onnx = run_dir / "onnx" / "int8" / "model.onnx" | |
| card_path = Path(args.card) | |
| missing = [str(merged_dir / n) for n in MODEL_FILES if not (merged_dir / n).exists()] | |
| if not fp32_onnx.exists(): | |
| missing.append(str(fp32_onnx)) | |
| if not int8_onnx.exists(): | |
| # int8 is optional but warn loudly | |
| print(f"warn: no int8 onnx at {int8_onnx}; uploading fp32 only") | |
| if not card_path.exists(): | |
| missing.append(str(card_path)) | |
| if missing: | |
| print("error: missing files:", file=sys.stderr) | |
| for path in missing: | |
| print(f" {path}", file=sys.stderr) | |
| sys.exit(1) | |
| api = HfApi(token=token) | |
| private = not args.public | |
| print(f"[hub] creating repo {args.repo} (private={private})") | |
| api.create_repo(args.repo, repo_type="model", private=private, exist_ok=True) | |
| uploads = {"README.md": card_path} | |
| for name in MODEL_FILES: | |
| local = merged_dir / name | |
| if local.exists(): | |
| uploads[name] = local | |
| uploads["onnx/model.onnx"] = fp32_onnx | |
| if int8_onnx.exists(): | |
| uploads["onnx/int8/model.onnx"] = int8_onnx | |
| total_mb = sum(p.stat().st_size for p in uploads.values()) / 1e6 | |
| print(f"[hub] uploading {len(uploads)} files, {total_mb:.0f} MB") | |
| operations = [ | |
| CommitOperationAdd(path_in_repo=repo_path, path_or_fileobj=str(local_path)) | |
| for repo_path, local_path in uploads.items() | |
| ] | |
| api.create_commit( | |
| repo_id=args.repo, | |
| repo_type="model", | |
| operations=operations, | |
| commit_message="add mumble cleanup model, fp32 + int8 onnx, model card", | |
| ) | |
| print() | |
| print(f"[hub] done: https://huggingface.co/{args.repo}") | |
| if __name__ == "__main__": | |
| main() | |