File size: 679 Bytes
a8639ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import os
import glob
from tqdm import tqdm
folder = os.path.expanduser("~/torch_datasets/github-python/mega_licensed_all_files")
output_file = os.path.expanduser(
"~/torch_datasets/github-python/mega_licensed_corpus/concatenated.py"
)
with open(output_file, "w", encoding="utf-8") as out_f:
for file in tqdm(glob.glob(os.path.join(folder, "*.py"))):
out_f.write("\n# <FILESEP>\n")
try:
with open(file, "r", encoding="utf-8", errors="ignore") as in_f:
out_f.write(in_f.read())
except Exception as e:
out_f.write(f"\n# Skipping {file} due to error: {e}\n")
print(f"Concatenation complete: {output_file}")
|