File size: 902 Bytes
ff54a63 5e3bca6 ff54a63 5e3bca6 fe276b5 5e3bca6 ff54a63 5e3bca6 ff54a63 5e3bca6 fe276b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import os
import shutil
from datetime import datetime
from src.utils.constants import TEMP_FOLDER
def get_timestamp():
"""
Return the current timestamp as a string in the format
"%Y%m%d_%H%M%S".
"""
return datetime.now().strftime("%Y%m%d-%H%M%S")
def extract_corpus(fileobj):
"""
Reads a file object and returns its contents as a list of strings.
Copies the file to a temporary location on disk, then reads it line by line
into a list.
Parameters
----------
fileobj : File-like object
The file to read.
Returns
-------
lines : List of str
The contents of the file as a list of strings.
"""
path = TEMP_FOLDER + get_timestamp() + os.path.basename(fileobj)
shutil.copyfile(fileobj.name, path)
with open(path, "r") as f:
lines = f.readlines()
os.remove(path)
return lines, fileobj.name
|