Bertopic / src /utils /utils.py
Dopler47's picture
Optimized GPU utilization
fe276b5
import os
import shutil
from datetime import datetime
from src.utils.constants import TEMP_FOLDER
def get_timestamp():
"""
Return the current timestamp as a string in the format
"%Y%m%d_%H%M%S".
"""
return datetime.now().strftime("%Y%m%d-%H%M%S")
def extract_corpus(fileobj):
"""
Reads a file object and returns its contents as a list of strings.
Copies the file to a temporary location on disk, then reads it line by line
into a list.
Parameters
----------
fileobj : File-like object
The file to read.
Returns
-------
lines : List of str
The contents of the file as a list of strings.
"""
path = TEMP_FOLDER + get_timestamp() + os.path.basename(fileobj)
shutil.copyfile(fileobj.name, path)
with open(path, "r") as f:
lines = f.readlines()
os.remove(path)
return lines, fileobj.name