"""Check whether required Nomen-AI Hub artifacts exist. Run before DPO/evaluation. It checks that dataset repos load and that adapter repos contain PEFT adapter weight files. """ from huggingface_hub import HfApi from datasets import load_dataset SFT_DATASET = 'krystv/nomen-ai-sft' DPO_DATASET = 'krystv/nomen-ai-dpo' SFT_ADAPTER = 'krystv/nomen-ai-sft-lora' DPO_ADAPTER = 'krystv/nomen-ai-dpo-lora' def repo_files(repo_id: str): info = HfApi().repo_info(repo_id=repo_id, repo_type='model') return [s.rfilename for s in info.siblings] def has_adapter_weights(repo_id: str) -> bool: files = repo_files(repo_id) return any(name.endswith(('adapter_model.safetensors', 'adapter_model.bin')) for name in files) def main(): print('Checking datasets...') sft = load_dataset(SFT_DATASET, split='train[:1]') dpo = load_dataset(DPO_DATASET, split='train[:1]') print('SFT columns:', sft.column_names) print('DPO columns:', dpo.column_names) print('Checking adapters...') sft_ready = has_adapter_weights(SFT_ADAPTER) dpo_ready = has_adapter_weights(DPO_ADAPTER) print(f'SFT adapter weights present: {sft_ready}') print(f'DPO adapter weights present: {dpo_ready}') if not sft_ready: print('ACTION: run scripts/train_sft.py before DPO/evaluation.') if sft_ready and not dpo_ready: print('ACTION: run scripts/train_dpo.py to create the DPO adapter.') if sft_ready and dpo_ready: print('ALL_ARTIFACTS_READY') if __name__ == '__main__': main()