File size: 902 Bytes
ff54a63
 
5e3bca6
ff54a63
 
 
 
5e3bca6
 
 
 
 
fe276b5
5e3bca6
 
ff54a63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e3bca6
ff54a63
 
 
 
 
5e3bca6
 
fe276b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
import shutil
from datetime import datetime

from src.utils.constants import TEMP_FOLDER


def get_timestamp():
    """
    Return the current timestamp as a string in the format
    "%Y%m%d_%H%M%S".
    """
    return datetime.now().strftime("%Y%m%d-%H%M%S")


def extract_corpus(fileobj):
    """
    Reads a file object and returns its contents as a list of strings.

    Copies the file to a temporary location on disk, then reads it line by line
    into a list.

    Parameters
    ----------
    fileobj : File-like object
        The file to read.

    Returns
    -------
    lines : List of str
        The contents of the file as a list of strings.
    """
    path = TEMP_FOLDER + get_timestamp() + os.path.basename(fileobj)
    shutil.copyfile(fileobj.name, path)

    with open(path, "r") as f:
        lines = f.readlines()

    os.remove(path)

    return lines, fileobj.name