English
SyvaAI-Bv1 / create.py
danielgrims's picture
Upload folder using huggingface_hub
1026698 verified
import os
INPUT_FOLDER = "/home/ubuntu/output" # path to your processed wiki dump folder
OUTPUT_FILE = "wiki_texts.txt"
with open(OUTPUT_FILE, "w", encoding="utf-8") as out_f:
# walk through all directories and files recursively
for root, dirs, files in os.walk(INPUT_FOLDER):
for filename in sorted(files):
filepath = os.path.join(root, filename)
with open(filepath, "r", encoding="utf-8") as f:
for line in f:
text = line.strip().replace("\n", " ")
if text: # skip empty lines
out_f.write(text + "\n")
print(f"✅ wiki_texts.txt created with all documents from {INPUT_FOLDER}!")