Spaces:
Build error
Build error
| from collections import OrderedDict | |
| from datetime import datetime | |
| import pandas as pd | |
| import os | |
| def read_text_file(file_path): | |
| with open(file_path, 'r') as file: | |
| content = file.read() | |
| return content | |
| # FILTER FUNCTION | |
| def filter_profiles_by_input(profiles, data_dictionary): | |
| """Interactive filtering with step-by-step criteria selection and data dictionary integration""" | |
| print("\n=== FILTER SETTINGS ===") | |
| # Get column names from the data dictionary | |
| try: | |
| dd_columns = data_dictionary.get_columns() # Use get_columns() from your DataDictionary class | |
| if not dd_columns: | |
| raise RuntimeError("Data dictionary returned no columns.") | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to retrieve columns from data dictionary: {str(e)}") | |
| print("Available columns:") | |
| for col in sorted(dd_columns): | |
| print(f" • {col}") | |
| remaining_columns = set(dd_columns) | |
| filtered_profiles = profiles | |
| while True: | |
| if not remaining_columns: | |
| print("\nNo more columns available for filtering.") | |
| break | |
| print("\nColumns available to filter on:") | |
| for col in sorted(remaining_columns): | |
| print(f" • {col}") | |
| column = input("\nEnter column name to filter (press Enter to finish): ").strip() | |
| if not column: | |
| break # Stop filtering when user presses Enter | |
| if column not in remaining_columns: | |
| print(f"\nError: Column '{column}' not found or already used for filtering.") | |
| continue | |
| value = input(f"Enter value to filter for '{column}' (press Enter to skip): ").strip() | |
| if not value: | |
| print("\nNo value entered. Skipping this filter.") | |
| continue | |
| new_filtered_profiles = [ | |
| profile for profile in filtered_profiles | |
| if value.lower() in str(profile.get_attributes().get(column, "")).lower() | |
| ] | |
| if not new_filtered_profiles: | |
| print(f"\nNo matches for '{column}' containing '{value}'. Returning to previous state.") | |
| continue | |
| filtered_profiles = new_filtered_profiles | |
| remaining_columns.remove(column) | |
| print(f"\nFound {len(filtered_profiles)} matching profiles") | |
| print(f"Profiles filtered out: {len(profiles) - len(filtered_profiles)}") | |
| confirm = input("\nProceed with another filter? (Yes/No): ").strip().lower() | |
| while confirm not in ['yes', 'no']: | |
| confirm = input("Invalid input. Please enter 'Yes' or 'No': ").strip().lower() | |
| if confirm == 'no': | |
| break | |
| return filtered_profiles | |
| def generate_file_excerpt(file_path, pattern, max_chars=5000): | |
| # Step 1: Read the file content | |
| with open(file_path, 'r') as file: | |
| lines = file.readlines() | |
| # Step 2: Extract lines starting with "pattern" | |
| extracted_lines = [line.replace(pattern, '').strip() for line in lines if line.startswith(pattern) and len(line.split()) >= 6] | |
| # Step 3: Join all extracted lines into a single string | |
| full_text = '\n'.join(extracted_lines) | |
| # Step 4: Return the first max_chars characters | |
| return full_text[-max_chars:] # Taking the last max_chars characters | |
| def generate_dict_from_file(file_name, column_name1, column_name2): | |
| df = pd.read_excel(file_name, usecols=[column_name1, column_name2], engine='openpyxl') # Specify the engine | |
| # Convert the DataFrame to a dictionary with Questions as keys and Answers as values | |
| ordered_dict = OrderedDict(zip(df[column_name1], df[column_name2])) | |
| return ordered_dict | |
| def find_latest_timestamped_file(directory, filename_pattern): | |
| """Finds the file with the latest timestamp within a given directory. | |
| Args: | |
| directory: The directory to search for files. | |
| filename_pattern: The pattern to match filenames (e.g., "interview_results.xlsx"). | |
| Returns: | |
| The path to the latest timestamped file, or None if no matching files were found. | |
| """ | |
| files = [f for f in os.listdir(directory) if f.endswith(filename_pattern)] | |
| if not files: | |
| print(f"Unable to find file with {filename_pattern} in {directory}") | |
| return None | |
| latest_file = sorted(files, key=lambda f: os.path.getmtime(os.path.join(directory, f)), reverse=True)[0] | |
| return os.path.join(directory, latest_file) | |
| def generate_pivot_table(original_table, index, columns, values): | |
| # Step 1: Flatten all SurveyEntry objects into a DataFrame | |
| df = pd.json_normalize(entry.dict() for report in original_table for entry in report.Entries) | |
| # Step 2: Extract the original order of 'columns' (e.g., questions) | |
| original_order = df[columns].drop_duplicates().tolist() | |
| # Step 3: Pivot the DataFrame | |
| summary_df = df.pivot(index=index, columns=columns, values=values) | |
| # Step 4: Reindex to preserve the original order of columns | |
| summary_df = summary_df.reindex(columns=original_order).reset_index().fillna("No Response") | |
| # Return the summary DataFrame | |
| return summary_df |