File size: 1,634 Bytes
dddf3f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os
from source.text import save_text_file
from source.embedding import text_to_embedding
import numpy as np

def generate_unique_id(directory="raw_text"):
    """Generate a sequential ID based on the number of files in the specified directory."""
    if not os.path.exists(directory):
        os.makedirs(directory)  # Ensure the directory exists
    
    # List all files in the directory and count them
    files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    next_id = len(files) + 1
    
    # Format the ID as a two-digit string, e.g., "01", "02", ...
    formatted_id = f"{next_id:02d}"
    return formatted_id

def process_mentor_profile(profile_text):
    # Generate a unique ID for the profile using the sequential ID generation method
    profile_id = generate_unique_id()
    
    # Define directories
    raw_text_directory = "raw_text"
    embedding_directory = "embedded_text"
    
    # Ensure directories exist
    if not os.path.exists(raw_text_directory):
        os.makedirs(raw_text_directory)
    if not os.path.exists(embedding_directory):
        os.makedirs(embedding_directory)
    
    # Save profile text to a .txt file with the sequential ID
    txt_filename = f"{profile_id}.txt"
    save_text_file(txt_filename, profile_text, directory=raw_text_directory)
    
    # Generate embedding from the text
    embedding = text_to_embedding(profile_text)
    
    # Save the embedding to a .npy file, using the same sequential ID
    npy_filename = f"{profile_id}.npy"
    np.save(os.path.join(embedding_directory, npy_filename), embedding)

    return profile_id