""" Hugging Face Hub push script. Merges LoRA adapters into the base model, creates the model card, and pushes everything to the Hub under your account namespace. Usage ----- python scripts/push_to_hub.py \\ --adapter checkpoints/worlddisasterlm-qlora \\ --base-model meta-llama/Llama-3.1-8B-Instruct \\ --repo-id YourHFUsername/WorldDisasterLM-8B Requirements ------------ export HF_TOKEN=hf_xxxx pip install transformers peft huggingface_hub """ from __future__ import annotations import argparse import logging import os from pathlib import Path import tempfile logger = logging.getLogger(__name__) HF_MODEL_CARD = """--- language: - en - ne - es - fr - ar - hi - te - zh - ja - ko - pt license: llama3 base_model: meta-llama/Llama-3.1-8B-Instruct tags: - disaster-management - emergency-response - humanitarian-ai - fine-tuned - qlora - lora - peft pipeline_tag: text-generation library_name: transformers model-index: - name: WorldDisasterLM-8B results: [] --- # WorldDisasterLM — Open Foundation Model for Global Disaster Intelligence WorldDisasterLM is an instruction-tuned large language model built on top of **Llama 3.1 8B Instruct**, domain-adapted on global disaster data from ReliefWeb, USGS, NOAA, GDACS, OpenFEMA, and WHO. ## Model Details | Property | Value | |---|---| | Base model | meta-llama/Llama-3.1-8B-Instruct | | Training method | QLoRA (4-bit NF4 quantization, LoRA r=16) | | Languages | EN, ES, FR, AR, HI, TE, ZH, JA, KO, PT | | Domain | Disaster management, humanitarian response, risk intelligence | | License | Llama 3 Community License (see Meta's terms) | ## Quick Start ```python from transformers import AutoModelForCausalLM, AutoTokenizer import torch model_id = "YOUR_HF_USERNAME/WorldDisasterLM-8B" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="auto", ) messages = [ { "role": "system", "content": "You are WorldDisasterLM, an expert in disaster management and emergency response.", }, {"role": "user", "content": "What should I do immediately after an earthquake?"}, ] inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True) outputs = model.generate(inputs.to(model.device), max_new_tokens=512, temperature=0.7) print(tokenizer.decode(outputs[0], skip_special_tokens=True)) ``` ## Training Data Collected from free, publicly accessible sources: - **ReliefWeb** — humanitarian reports and disaster assessments - **USGS** — earthquake catalog (magnitude ≥4.0, 10-year archive) - **NOAA** — weather alerts and severe weather events - **GDACS** — global disaster alert coordination events - **OpenFEMA** — US federal disaster declarations - **WHO** — disease outbreak news and public health alerts Each raw record was expanded into 8 instruction-following QA variants (immediate response, resource planning, risk assessment, public communication, recovery planning, multilingual guidance) for a multi-hundred-thousand sample corpus. ## Intended Use - Emergency operations centers - Government disaster management agencies - NGOs and humanitarian organizations - Public health authorities - Researchers in disaster risk reduction - Community preparedness applications - Citizens seeking emergency guidance ## Safety and Limitations - **Not a substitute** for real-time emergency management systems or official orders. - Always verify critical operational decisions with local emergency authorities. - Model outputs should be reviewed by trained emergency professionals for life-safety decisions. - Some low-resource languages may have lower quality responses. - Training data may not reflect the most recent real-time events. ## Citation ```bibtex @misc{worlddisasterlm2026, title = {WorldDisasterLM: An Open Foundation Model for Global Disaster Management}, year = {2026}, url = {https://huggingface.co/YOUR_HF_USERNAME/WorldDisasterLM-8B} } ``` """ def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Merge LoRA adapters and push WorldDisasterLM to Hugging Face Hub") parser.add_argument("--adapter", default="checkpoints/worlddisasterlm-qlora", help="Path to LoRA adapter checkpoint") parser.add_argument("--base-model", default="meta-llama/Llama-3.1-8B-Instruct", help="Base model ID") parser.add_argument("--repo-id", required=True, help="HF repo ID, e.g. YourUsername/WorldDisasterLM-8B") parser.add_argument("--private", action="store_true", help="Create as private repo (default: public)") parser.add_argument("--push-dtype", choices=["bfloat16", "float16", "float32"], default="bfloat16") return parser.parse_args() def merge_and_push(adapter_path: str, base_model_id: str, repo_id: str, private: bool, push_dtype: str) -> None: import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel from huggingface_hub import HfApi, create_repo token = os.getenv("HF_TOKEN") if not token: raise SystemExit("HF_TOKEN environment variable not set. Run: huggingface-cli login") dtype_map = { "bfloat16": torch.bfloat16, "float16": torch.float16, "float32": torch.float32, } torch_dtype = dtype_map[push_dtype] api = HfApi(token=token) logger.info("Creating or verifying repo: %s", repo_id) create_repo(repo_id=repo_id, token=token, private=private, repo_type="model", exist_ok=True) logger.info("Loading tokenizer from adapter path: %s", adapter_path) tokenizer = AutoTokenizer.from_pretrained(adapter_path, trust_remote_code=True) logger.info("Loading base model: %s", base_model_id) base_model = AutoModelForCausalLM.from_pretrained( base_model_id, torch_dtype=torch_dtype, device_map="auto", trust_remote_code=True, ) logger.info("Loading LoRA adapter from: %s", adapter_path) peft_model = PeftModel.from_pretrained(base_model, adapter_path) logger.info("Merging LoRA weights into base model …") merged_model = peft_model.merge_and_unload() merged_model.config.use_cache = True logger.info("Pushing merged model to %s …", repo_id) merged_model.push_to_hub(repo_id, token=token, safe_serialization=True) tokenizer.push_to_hub(repo_id, token=token) # Upload model card with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False, encoding="utf-8") as tf: tf.write(HF_MODEL_CARD.replace("YOUR_HF_USERNAME", repo_id.split("/")[0])) tmp_card_path = tf.name api.upload_file( path_or_fileobj=tmp_card_path, path_in_repo="README.md", repo_id=repo_id, repo_type="model", token=token, ) Path(tmp_card_path).unlink(missing_ok=True) logger.info("Done! Model published at: https://huggingface.co/%s", repo_id) logger.info("Tag your model as free-to-use by setting the license in the repo settings.") def main() -> None: logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s") args = parse_args() merge_and_push( adapter_path=args.adapter, base_model_id=args.base_model, repo_id=args.repo_id, private=args.private, push_dtype=args.push_dtype, ) if __name__ == "__main__": main()