elaineaishophouse's picture
Upload 15 files
441d880 verified
raw
history blame
5.19 kB
from collections import OrderedDict
from datetime import datetime
import pandas as pd
import os
def read_text_file(file_path):
with open(file_path, 'r') as file:
content = file.read()
return content
# FILTER FUNCTION
def filter_profiles_by_input(profiles, data_dictionary):
"""Interactive filtering with step-by-step criteria selection and data dictionary integration"""
print("\n=== FILTER SETTINGS ===")
# Get column names from the data dictionary
try:
dd_columns = data_dictionary.get_columns() # Use get_columns() from your DataDictionary class
if not dd_columns:
raise RuntimeError("Data dictionary returned no columns.")
except Exception as e:
raise RuntimeError(f"Failed to retrieve columns from data dictionary: {str(e)}")
print("Available columns:")
for col in sorted(dd_columns):
print(f" • {col}")
remaining_columns = set(dd_columns)
filtered_profiles = profiles
while True:
if not remaining_columns:
print("\nNo more columns available for filtering.")
break
print("\nColumns available to filter on:")
for col in sorted(remaining_columns):
print(f" • {col}")
column = input("\nEnter column name to filter (press Enter to finish): ").strip()
if not column:
break # Stop filtering when user presses Enter
if column not in remaining_columns:
print(f"\nError: Column '{column}' not found or already used for filtering.")
continue
value = input(f"Enter value to filter for '{column}' (press Enter to skip): ").strip()
if not value:
print("\nNo value entered. Skipping this filter.")
continue
new_filtered_profiles = [
profile for profile in filtered_profiles
if value.lower() in str(profile.get_attributes().get(column, "")).lower()
]
if not new_filtered_profiles:
print(f"\nNo matches for '{column}' containing '{value}'. Returning to previous state.")
continue
filtered_profiles = new_filtered_profiles
remaining_columns.remove(column)
print(f"\nFound {len(filtered_profiles)} matching profiles")
print(f"Profiles filtered out: {len(profiles) - len(filtered_profiles)}")
confirm = input("\nProceed with another filter? (Yes/No): ").strip().lower()
while confirm not in ['yes', 'no']:
confirm = input("Invalid input. Please enter 'Yes' or 'No': ").strip().lower()
if confirm == 'no':
break
return filtered_profiles
def generate_file_excerpt(file_path, pattern, max_chars=5000):
# Step 1: Read the file content
with open(file_path, 'r') as file:
lines = file.readlines()
# Step 2: Extract lines starting with "pattern"
extracted_lines = [line.replace(pattern, '').strip() for line in lines if line.startswith(pattern) and len(line.split()) >= 6]
# Step 3: Join all extracted lines into a single string
full_text = '\n'.join(extracted_lines)
# Step 4: Return the first max_chars characters
return full_text[-max_chars:] # Taking the last max_chars characters
def generate_dict_from_file(file_name, column_name1, column_name2):
df = pd.read_excel(file_name, usecols=[column_name1, column_name2], engine='openpyxl') # Specify the engine
# Convert the DataFrame to a dictionary with Questions as keys and Answers as values
ordered_dict = OrderedDict(zip(df[column_name1], df[column_name2]))
return ordered_dict
def find_latest_timestamped_file(directory, filename_pattern):
"""Finds the file with the latest timestamp within a given directory.
Args:
directory: The directory to search for files.
filename_pattern: The pattern to match filenames (e.g., "interview_results.xlsx").
Returns:
The path to the latest timestamped file, or None if no matching files were found.
"""
files = [f for f in os.listdir(directory) if f.endswith(filename_pattern)]
if not files:
print(f"Unable to find file with {filename_pattern} in {directory}")
return None
latest_file = sorted(files, key=lambda f: os.path.getmtime(os.path.join(directory, f)), reverse=True)[0]
return os.path.join(directory, latest_file)
def generate_pivot_table(original_table, index, columns, values):
# Step 1: Flatten all SurveyEntry objects into a DataFrame
df = pd.json_normalize(entry.dict() for report in original_table for entry in report.Entries)
# Step 2: Extract the original order of 'columns' (e.g., questions)
original_order = df[columns].drop_duplicates().tolist()
# Step 3: Pivot the DataFrame
summary_df = df.pivot(index=index, columns=columns, values=values)
# Step 4: Reindex to preserve the original order of columns
summary_df = summary_df.reindex(columns=original_order).reset_index().fillna("No Response")
# Return the summary DataFrame
return summary_df