Text Generation
Transformers
PEFT
llama
disaster-management
emergency-response
humanitarian-ai
multilingual
fine-tuned
qlora
lora
llama3
conversational
4-bit precision
bitsandbytes
Instructions to use drdeveloper88/WorldDisasterLM-8B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use drdeveloper88/WorldDisasterLM-8B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="drdeveloper88/WorldDisasterLM-8B") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("drdeveloper88/WorldDisasterLM-8B") model = AutoModelForCausalLM.from_pretrained("drdeveloper88/WorldDisasterLM-8B") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - PEFT
How to use drdeveloper88/WorldDisasterLM-8B with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use drdeveloper88/WorldDisasterLM-8B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "drdeveloper88/WorldDisasterLM-8B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "drdeveloper88/WorldDisasterLM-8B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/drdeveloper88/WorldDisasterLM-8B
- SGLang
How to use drdeveloper88/WorldDisasterLM-8B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "drdeveloper88/WorldDisasterLM-8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "drdeveloper88/WorldDisasterLM-8B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "drdeveloper88/WorldDisasterLM-8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "drdeveloper88/WorldDisasterLM-8B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use drdeveloper88/WorldDisasterLM-8B with Docker Model Runner:
docker model run hf.co/drdeveloper88/WorldDisasterLM-8B
Upload WorldDisasterLM-8B source code: FastAPI backend, training pipeline, 11-language support
495526b | """ | |
| GGUF conversion script for WorldDisasterLM. | |
| Converts the merged Hugging Face model to GGUF format for CPU inference | |
| and mobile deployment using llama.cpp. | |
| Quantization sizes (approximate for 8B model) | |
| ----------------------------------------------- | |
| Q4_K_M → ~4.8 GB (recommended for most use cases) | |
| Q5_K_M → ~5.6 GB (better quality) | |
| Q8_0 → ~8.5 GB (highest quality, slower) | |
| f16 → ~15 GB (full precision) | |
| Usage | |
| ----- | |
| # Full automated flow (requires llama.cpp cloned alongside this repo) | |
| python scripts/convert_gguf.py \\ | |
| --model-path checkpoints/worlddisasterlm-merged \\ | |
| --llama-cpp-path ../llama.cpp \\ | |
| --quant Q4_K_M | |
| # Manual steps are printed if llama.cpp is not found | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import logging | |
| import shutil | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| logger = logging.getLogger(__name__) | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description="Convert WorldDisasterLM to GGUF") | |
| parser.add_argument("--model-path", default="checkpoints/worlddisasterlm-merged", help="Path to merged HF model") | |
| parser.add_argument("--output-dir", default="artifacts", help="Output directory for GGUF files") | |
| parser.add_argument("--llama-cpp-path", default="../llama.cpp", help="Path to llama.cpp repo") | |
| parser.add_argument( | |
| "--quant", | |
| default="Q4_K_M", | |
| choices=["Q4_K_M", "Q5_K_M", "Q8_0", "f16"], | |
| help="Quantization type", | |
| ) | |
| return parser.parse_args() | |
| def print_manual_steps(model_path: str, output_dir: str, quant: str) -> None: | |
| print("\n" + "=" * 70) | |
| print("MANUAL GGUF CONVERSION STEPS") | |
| print("=" * 70) | |
| print("\nStep 1: Clone llama.cpp and build") | |
| print(" git clone https://github.com/ggerganov/llama.cpp") | |
| print(" cd llama.cpp") | |
| print(" cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS") | |
| print(" cmake --build build --config Release") | |
| print() | |
| print("Step 2: Install Python dependencies") | |
| print(" pip install -r llama.cpp/requirements.txt") | |
| print() | |
| print("Step 3: Convert HF model to GGUF (f16)") | |
| print(f" python llama.cpp/convert_hf_to_gguf.py {model_path} \\") | |
| print(f" --outtype f16 --outfile {output_dir}/worlddisasterlm_f16.gguf") | |
| print() | |
| print(f"Step 4: Quantize to {quant}") | |
| print(f" ./llama.cpp/build/bin/llama-quantize \\") | |
| print(f" {output_dir}/worlddisasterlm_f16.gguf \\") | |
| print(f" {output_dir}/worlddisasterlm_{quant.lower()}.gguf \\") | |
| print(f" {quant}") | |
| print() | |
| print("Step 5: Upload GGUF to Hugging Face") | |
| print(" huggingface-cli upload YourUsername/WorldDisasterLM-GGUF \\") | |
| print(f" {output_dir}/worlddisasterlm_{quant.lower()}.gguf \\") | |
| print(f" worlddisasterlm_{quant.lower()}.gguf") | |
| print("=" * 70 + "\n") | |
| def run_conversion(model_path: str, llama_cpp_path: str, output_dir: str, quant: str) -> None: | |
| llama_dir = Path(llama_cpp_path).resolve() | |
| model_dir = Path(model_path).resolve() | |
| out_dir = Path(output_dir) | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| convert_script = llama_dir / "convert_hf_to_gguf.py" | |
| quantize_bin = llama_dir / "build" / "bin" / "llama-quantize" | |
| quantize_bin_win = llama_dir / "build" / "bin" / "Release" / "llama-quantize.exe" | |
| if not convert_script.exists(): | |
| logger.error("convert_hf_to_gguf.py not found in %s", llama_dir) | |
| print_manual_steps(model_path, output_dir, quant) | |
| sys.exit(1) | |
| f16_gguf = out_dir / "worlddisasterlm_f16.gguf" | |
| quant_gguf = out_dir / f"worlddisasterlm_{quant.lower()}.gguf" | |
| # Convert to f16 GGUF | |
| logger.info("Converting HF model to f16 GGUF …") | |
| subprocess.run( | |
| [sys.executable, str(convert_script), str(model_dir), "--outtype", "f16", "--outfile", str(f16_gguf)], | |
| check=True, | |
| ) | |
| # Find quantize binary | |
| q_bin = quantize_bin if quantize_bin.exists() else (quantize_bin_win if quantize_bin_win.exists() else None) | |
| if q_bin is None: | |
| logger.warning("llama-quantize binary not found. f16 GGUF saved at %s", f16_gguf) | |
| print_manual_steps(model_path, output_dir, quant) | |
| return | |
| # Quantize | |
| logger.info("Quantizing to %s …", quant) | |
| subprocess.run([str(q_bin), str(f16_gguf), str(quant_gguf), quant], check=True) | |
| logger.info("GGUF model saved to %s", quant_gguf) | |
| logger.info("Upload with: huggingface-cli upload <repo_id> %s", quant_gguf) | |
| def main() -> None: | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s") | |
| args = parse_args() | |
| llama_dir = Path(args.llama_cpp_path) | |
| if not llama_dir.exists(): | |
| logger.warning("llama.cpp directory not found at %s — printing manual steps.", args.llama_cpp_path) | |
| print_manual_steps(args.model_path, args.output_dir, args.quant) | |
| return | |
| run_conversion( | |
| model_path=args.model_path, | |
| llama_cpp_path=args.llama_cpp_path, | |
| output_dir=args.output_dir, | |
| quant=args.quant, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |