import os from source.text import save_text_file from source.embedding import text_to_embedding import numpy as np def generate_unique_id(directory="raw_text"): """Generate a sequential ID based on the number of files in the specified directory.""" if not os.path.exists(directory): os.makedirs(directory) # Ensure the directory exists # List all files in the directory and count them files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] next_id = len(files) + 1 # Format the ID as a two-digit string, e.g., "01", "02", ... formatted_id = f"{next_id:02d}" return formatted_id def process_mentor_profile(profile_text): # Generate a unique ID for the profile using the sequential ID generation method profile_id = generate_unique_id() # Define directories raw_text_directory = "raw_text" embedding_directory = "embedded_text" # Ensure directories exist if not os.path.exists(raw_text_directory): os.makedirs(raw_text_directory) if not os.path.exists(embedding_directory): os.makedirs(embedding_directory) # Save profile text to a .txt file with the sequential ID txt_filename = f"{profile_id}.txt" save_text_file(txt_filename, profile_text, directory=raw_text_directory) # Generate embedding from the text embedding = text_to_embedding(profile_text) # Save the embedding to a .npy file, using the same sequential ID npy_filename = f"{profile_id}.npy" np.save(os.path.join(embedding_directory, npy_filename), embedding) return profile_id