Spaces:
Runtime error
Runtime error
| import os | |
| from source.text import save_text_file | |
| from source.embedding import text_to_embedding | |
| import numpy as np | |
| def generate_unique_id(directory="raw_text"): | |
| """Generate a sequential ID based on the number of files in the specified directory.""" | |
| if not os.path.exists(directory): | |
| os.makedirs(directory) # Ensure the directory exists | |
| # List all files in the directory and count them | |
| files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] | |
| next_id = len(files) + 1 | |
| # Format the ID as a two-digit string, e.g., "01", "02", ... | |
| formatted_id = f"{next_id:02d}" | |
| return formatted_id | |
| def process_mentor_profile(profile_text): | |
| # Generate a unique ID for the profile using the sequential ID generation method | |
| profile_id = generate_unique_id() | |
| # Define directories | |
| raw_text_directory = "raw_text" | |
| embedding_directory = "embedded_text" | |
| # Ensure directories exist | |
| if not os.path.exists(raw_text_directory): | |
| os.makedirs(raw_text_directory) | |
| if not os.path.exists(embedding_directory): | |
| os.makedirs(embedding_directory) | |
| # Save profile text to a .txt file with the sequential ID | |
| txt_filename = f"{profile_id}.txt" | |
| save_text_file(txt_filename, profile_text, directory=raw_text_directory) | |
| # Generate embedding from the text | |
| embedding = text_to_embedding(profile_text) | |
| # Save the embedding to a .npy file, using the same sequential ID | |
| npy_filename = f"{profile_id}.npy" | |
| np.save(os.path.join(embedding_directory, npy_filename), embedding) | |
| return profile_id | |