import os import shutil from datetime import datetime from src.utils.constants import TEMP_FOLDER def get_timestamp(): """ Return the current timestamp as a string in the format "%Y%m%d_%H%M%S". """ return datetime.now().strftime("%Y%m%d-%H%M%S") def extract_corpus(fileobj): """ Reads a file object and returns its contents as a list of strings. Copies the file to a temporary location on disk, then reads it line by line into a list. Parameters ---------- fileobj : File-like object The file to read. Returns ------- lines : List of str The contents of the file as a list of strings. """ path = TEMP_FOLDER + get_timestamp() + os.path.basename(fileobj) shutil.copyfile(fileobj.name, path) with open(path, "r") as f: lines = f.readlines() os.remove(path) return lines, fileobj.name