Spaces:
Build error
Build error
File size: 5,185 Bytes
441d880 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
from collections import OrderedDict
from datetime import datetime
import pandas as pd
import os
def read_text_file(file_path):
with open(file_path, 'r') as file:
content = file.read()
return content
# FILTER FUNCTION
def filter_profiles_by_input(profiles, data_dictionary):
"""Interactive filtering with step-by-step criteria selection and data dictionary integration"""
print("\n=== FILTER SETTINGS ===")
# Get column names from the data dictionary
try:
dd_columns = data_dictionary.get_columns() # Use get_columns() from your DataDictionary class
if not dd_columns:
raise RuntimeError("Data dictionary returned no columns.")
except Exception as e:
raise RuntimeError(f"Failed to retrieve columns from data dictionary: {str(e)}")
print("Available columns:")
for col in sorted(dd_columns):
print(f" • {col}")
remaining_columns = set(dd_columns)
filtered_profiles = profiles
while True:
if not remaining_columns:
print("\nNo more columns available for filtering.")
break
print("\nColumns available to filter on:")
for col in sorted(remaining_columns):
print(f" • {col}")
column = input("\nEnter column name to filter (press Enter to finish): ").strip()
if not column:
break # Stop filtering when user presses Enter
if column not in remaining_columns:
print(f"\nError: Column '{column}' not found or already used for filtering.")
continue
value = input(f"Enter value to filter for '{column}' (press Enter to skip): ").strip()
if not value:
print("\nNo value entered. Skipping this filter.")
continue
new_filtered_profiles = [
profile for profile in filtered_profiles
if value.lower() in str(profile.get_attributes().get(column, "")).lower()
]
if not new_filtered_profiles:
print(f"\nNo matches for '{column}' containing '{value}'. Returning to previous state.")
continue
filtered_profiles = new_filtered_profiles
remaining_columns.remove(column)
print(f"\nFound {len(filtered_profiles)} matching profiles")
print(f"Profiles filtered out: {len(profiles) - len(filtered_profiles)}")
confirm = input("\nProceed with another filter? (Yes/No): ").strip().lower()
while confirm not in ['yes', 'no']:
confirm = input("Invalid input. Please enter 'Yes' or 'No': ").strip().lower()
if confirm == 'no':
break
return filtered_profiles
def generate_file_excerpt(file_path, pattern, max_chars=5000):
# Step 1: Read the file content
with open(file_path, 'r') as file:
lines = file.readlines()
# Step 2: Extract lines starting with "pattern"
extracted_lines = [line.replace(pattern, '').strip() for line in lines if line.startswith(pattern) and len(line.split()) >= 6]
# Step 3: Join all extracted lines into a single string
full_text = '\n'.join(extracted_lines)
# Step 4: Return the first max_chars characters
return full_text[-max_chars:] # Taking the last max_chars characters
def generate_dict_from_file(file_name, column_name1, column_name2):
df = pd.read_excel(file_name, usecols=[column_name1, column_name2], engine='openpyxl') # Specify the engine
# Convert the DataFrame to a dictionary with Questions as keys and Answers as values
ordered_dict = OrderedDict(zip(df[column_name1], df[column_name2]))
return ordered_dict
def find_latest_timestamped_file(directory, filename_pattern):
"""Finds the file with the latest timestamp within a given directory.
Args:
directory: The directory to search for files.
filename_pattern: The pattern to match filenames (e.g., "interview_results.xlsx").
Returns:
The path to the latest timestamped file, or None if no matching files were found.
"""
files = [f for f in os.listdir(directory) if f.endswith(filename_pattern)]
if not files:
print(f"Unable to find file with {filename_pattern} in {directory}")
return None
latest_file = sorted(files, key=lambda f: os.path.getmtime(os.path.join(directory, f)), reverse=True)[0]
return os.path.join(directory, latest_file)
def generate_pivot_table(original_table, index, columns, values):
# Step 1: Flatten all SurveyEntry objects into a DataFrame
df = pd.json_normalize(entry.dict() for report in original_table for entry in report.Entries)
# Step 2: Extract the original order of 'columns' (e.g., questions)
original_order = df[columns].drop_duplicates().tolist()
# Step 3: Pivot the DataFrame
summary_df = df.pivot(index=index, columns=columns, values=values)
# Step 4: Reindex to preserve the original order of columns
summary_df = summary_df.reindex(columns=original_order).reset_index().fillna("No Response")
# Return the summary DataFrame
return summary_df |