"""
Hugging Face Hub push script.

Merges LoRA adapters into the base model, creates the model card, and
pushes everything to the Hub under your account namespace.

Usage
-----
python scripts/push_to_hub.py \\
    --adapter checkpoints/worlddisasterlm-qlora \\
    --base-model meta-llama/Llama-3.1-8B-Instruct \\
    --repo-id YourHFUsername/WorldDisasterLM-8B

Requirements
------------
  export HF_TOKEN=hf_xxxx
  pip install transformers peft huggingface_hub
"""

from __future__ import annotations

import argparse
import logging
import os
from pathlib import Path
import tempfile

logger = logging.getLogger(__name__)


HF_MODEL_CARD = """---
language:
- en
- ne
- es
- fr
- ar
- hi
- te
- zh
- ja
- ko
- pt
license: llama3
base_model: meta-llama/Llama-3.1-8B-Instruct
tags:
- disaster-management
- emergency-response
- humanitarian-ai
- fine-tuned
- qlora
- lora
- peft
pipeline_tag: text-generation
library_name: transformers
model-index:
- name: WorldDisasterLM-8B
  results: []
---

# WorldDisasterLM — Open Foundation Model for Global Disaster Intelligence

WorldDisasterLM is an instruction-tuned large language model built on top of
**Llama 3.1 8B Instruct**, domain-adapted on global disaster data from
ReliefWeb, USGS, NOAA, GDACS, OpenFEMA, and WHO.

## Model Details

| Property | Value |
|---|---|
| Base model | meta-llama/Llama-3.1-8B-Instruct |
| Training method | QLoRA (4-bit NF4 quantization, LoRA r=16) |
| Languages | EN, ES, FR, AR, HI, TE, ZH, JA, KO, PT |
| Domain | Disaster management, humanitarian response, risk intelligence |
| License | Llama 3 Community License (see Meta's terms) |

## Quick Start

```python
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "YOUR_HF_USERNAME/WorldDisasterLM-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

messages = [
    {
        "role": "system",
        "content": "You are WorldDisasterLM, an expert in disaster management and emergency response.",
    },
    {"role": "user", "content": "What should I do immediately after an earthquake?"},
]
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
outputs = model.generate(inputs.to(model.device), max_new_tokens=512, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
```

## Training Data

Collected from free, publicly accessible sources:
- **ReliefWeb** — humanitarian reports and disaster assessments
- **USGS** — earthquake catalog (magnitude ≥4.0, 10-year archive)
- **NOAA** — weather alerts and severe weather events
- **GDACS** — global disaster alert coordination events
- **OpenFEMA** — US federal disaster declarations
- **WHO** — disease outbreak news and public health alerts

Each raw record was expanded into 8 instruction-following QA variants
(immediate response, resource planning, risk assessment, public communication,
recovery planning, multilingual guidance) for a multi-hundred-thousand sample corpus.

## Intended Use

- Emergency operations centers
- Government disaster management agencies
- NGOs and humanitarian organizations
- Public health authorities
- Researchers in disaster risk reduction
- Community preparedness applications
- Citizens seeking emergency guidance

## Safety and Limitations

- **Not a substitute** for real-time emergency management systems or official orders.
- Always verify critical operational decisions with local emergency authorities.
- Model outputs should be reviewed by trained emergency professionals for life-safety decisions.
- Some low-resource languages may have lower quality responses.
- Training data may not reflect the most recent real-time events.

## Citation

```bibtex
@misc{worlddisasterlm2026,
  title  = {WorldDisasterLM: An Open Foundation Model for Global Disaster Management},
  year   = {2026},
  url    = {https://huggingface.co/YOUR_HF_USERNAME/WorldDisasterLM-8B}
}
```
"""


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Merge LoRA adapters and push WorldDisasterLM to Hugging Face Hub")
    parser.add_argument("--adapter", default="checkpoints/worlddisasterlm-qlora", help="Path to LoRA adapter checkpoint")
    parser.add_argument("--base-model", default="meta-llama/Llama-3.1-8B-Instruct", help="Base model ID")
    parser.add_argument("--repo-id", required=True, help="HF repo ID, e.g. YourUsername/WorldDisasterLM-8B")
    parser.add_argument("--private", action="store_true", help="Create as private repo (default: public)")
    parser.add_argument("--push-dtype", choices=["bfloat16", "float16", "float32"], default="bfloat16")
    return parser.parse_args()


def merge_and_push(adapter_path: str, base_model_id: str, repo_id: str, private: bool, push_dtype: str) -> None:
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer
    from peft import PeftModel
    from huggingface_hub import HfApi, create_repo

    token = os.getenv("HF_TOKEN")
    if not token:
        raise SystemExit("HF_TOKEN environment variable not set. Run: huggingface-cli login")

    dtype_map = {
        "bfloat16": torch.bfloat16,
        "float16": torch.float16,
        "float32": torch.float32,
    }
    torch_dtype = dtype_map[push_dtype]

    api = HfApi(token=token)

    logger.info("Creating or verifying repo: %s", repo_id)
    create_repo(repo_id=repo_id, token=token, private=private, repo_type="model", exist_ok=True)

    logger.info("Loading tokenizer from adapter path: %s", adapter_path)
    tokenizer = AutoTokenizer.from_pretrained(adapter_path, trust_remote_code=True)

    logger.info("Loading base model: %s", base_model_id)
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        torch_dtype=torch_dtype,
        device_map="auto",
        trust_remote_code=True,
    )

    logger.info("Loading LoRA adapter from: %s", adapter_path)
    peft_model = PeftModel.from_pretrained(base_model, adapter_path)

    logger.info("Merging LoRA weights into base model …")
    merged_model = peft_model.merge_and_unload()
    merged_model.config.use_cache = True

    logger.info("Pushing merged model to %s …", repo_id)
    merged_model.push_to_hub(repo_id, token=token, safe_serialization=True)
    tokenizer.push_to_hub(repo_id, token=token)

    # Upload model card
    with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False, encoding="utf-8") as tf:
        tf.write(HF_MODEL_CARD.replace("YOUR_HF_USERNAME", repo_id.split("/")[0]))
        tmp_card_path = tf.name

    api.upload_file(
        path_or_fileobj=tmp_card_path,
        path_in_repo="README.md",
        repo_id=repo_id,
        repo_type="model",
        token=token,
    )
    Path(tmp_card_path).unlink(missing_ok=True)

    logger.info("Done! Model published at: https://huggingface.co/%s", repo_id)
    logger.info("Tag your model as free-to-use by setting the license in the repo settings.")


def main() -> None:
    logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
    args = parse_args()
    merge_and_push(
        adapter_path=args.adapter,
        base_model_id=args.base_model,
        repo_id=args.repo_id,
        private=args.private,
        push_dtype=args.push_dtype,
    )


if __name__ == "__main__":
    main()