File size: 1,305 Bytes
6d12932 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
"""
Upload FONS Knowledge Base to Hugging Face Dataset
===================================================
This script uploads the large 61MB knowledge file to a HF Dataset,
so it can be loaded at runtime in the Space without hitting size limits.
"""
from datasets import Dataset
from huggingface_hub import HfApi
import json
import os
# Configuration
KNOWLEDGE_FILE = "fons_knowledge_google.jsonl"
DATASET_REPO = "NurseCitizenDeveloper/fons-knowledge-base"
def load_jsonl(filepath):
"""Load JSONL file into list of dicts."""
data = []
with open(filepath, 'r', encoding='utf-8') as f:
for line in f:
try:
data.append(json.loads(line))
except:
continue
return data
def main():
print(f"π Loading {KNOWLEDGE_FILE}...")
records = load_jsonl(KNOWLEDGE_FILE)
print(f"β
Loaded {len(records)} records")
# Create HF Dataset
dataset = Dataset.from_list(records)
print(f"π Dataset: {dataset}")
# Push to Hub
print(f"β¬οΈ Uploading to {DATASET_REPO}...")
dataset.push_to_hub(DATASET_REPO, private=False)
print(f"β
Upload complete! View at: https://huggingface.co/datasets/{DATASET_REPO}")
if __name__ == "__main__":
main()
|