from collections import OrderedDict from datetime import datetime import pandas as pd import os def read_text_file(file_path): with open(file_path, 'r') as file: content = file.read() return content # FILTER FUNCTION def filter_profiles_by_input(profiles, data_dictionary): """Interactive filtering with step-by-step criteria selection and data dictionary integration""" print("\n=== FILTER SETTINGS ===") # Get column names from the data dictionary try: dd_columns = data_dictionary.get_columns() # Use get_columns() from your DataDictionary class if not dd_columns: raise RuntimeError("Data dictionary returned no columns.") except Exception as e: raise RuntimeError(f"Failed to retrieve columns from data dictionary: {str(e)}") print("Available columns:") for col in sorted(dd_columns): print(f" • {col}") remaining_columns = set(dd_columns) filtered_profiles = profiles while True: if not remaining_columns: print("\nNo more columns available for filtering.") break print("\nColumns available to filter on:") for col in sorted(remaining_columns): print(f" • {col}") column = input("\nEnter column name to filter (press Enter to finish): ").strip() if not column: break # Stop filtering when user presses Enter if column not in remaining_columns: print(f"\nError: Column '{column}' not found or already used for filtering.") continue value = input(f"Enter value to filter for '{column}' (press Enter to skip): ").strip() if not value: print("\nNo value entered. Skipping this filter.") continue new_filtered_profiles = [ profile for profile in filtered_profiles if value.lower() in str(profile.get_attributes().get(column, "")).lower() ] if not new_filtered_profiles: print(f"\nNo matches for '{column}' containing '{value}'. Returning to previous state.") continue filtered_profiles = new_filtered_profiles remaining_columns.remove(column) print(f"\nFound {len(filtered_profiles)} matching profiles") print(f"Profiles filtered out: {len(profiles) - len(filtered_profiles)}") confirm = input("\nProceed with another filter? (Yes/No): ").strip().lower() while confirm not in ['yes', 'no']: confirm = input("Invalid input. Please enter 'Yes' or 'No': ").strip().lower() if confirm == 'no': break return filtered_profiles def generate_file_excerpt(file_path, pattern, max_chars=5000): # Step 1: Read the file content with open(file_path, 'r') as file: lines = file.readlines() # Step 2: Extract lines starting with "pattern" extracted_lines = [line.replace(pattern, '').strip() for line in lines if line.startswith(pattern) and len(line.split()) >= 6] # Step 3: Join all extracted lines into a single string full_text = '\n'.join(extracted_lines) # Step 4: Return the first max_chars characters return full_text[-max_chars:] # Taking the last max_chars characters def generate_dict_from_file(file_name, column_name1, column_name2): df = pd.read_excel(file_name, usecols=[column_name1, column_name2], engine='openpyxl') # Specify the engine # Convert the DataFrame to a dictionary with Questions as keys and Answers as values ordered_dict = OrderedDict(zip(df[column_name1], df[column_name2])) return ordered_dict def find_latest_timestamped_file(directory, filename_pattern): """Finds the file with the latest timestamp within a given directory. Args: directory: The directory to search for files. filename_pattern: The pattern to match filenames (e.g., "interview_results.xlsx"). Returns: The path to the latest timestamped file, or None if no matching files were found. """ files = [f for f in os.listdir(directory) if f.endswith(filename_pattern)] if not files: print(f"Unable to find file with {filename_pattern} in {directory}") return None latest_file = sorted(files, key=lambda f: os.path.getmtime(os.path.join(directory, f)), reverse=True)[0] return os.path.join(directory, latest_file) def generate_pivot_table(original_table, index, columns, values): # Step 1: Flatten all SurveyEntry objects into a DataFrame df = pd.json_normalize(entry.dict() for report in original_table for entry in report.Entries) # Step 2: Extract the original order of 'columns' (e.g., questions) original_order = df[columns].drop_duplicates().tolist() # Step 3: Pivot the DataFrame summary_df = df.pivot(index=index, columns=columns, values=values) # Step 4: Reindex to preserve the original order of columns summary_df = summary_df.reindex(columns=original_order).reset_index().fillna("No Response") # Return the summary DataFrame return summary_df