File size: 5,185 Bytes
441d880
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

from collections import OrderedDict
from datetime import datetime
import pandas as pd
import os


def read_text_file(file_path):
  with open(file_path, 'r') as file:
    content = file.read()
  return content

# FILTER FUNCTION
def filter_profiles_by_input(profiles, data_dictionary):
    """Interactive filtering with step-by-step criteria selection and data dictionary integration"""
    print("\n=== FILTER SETTINGS ===")

    # Get column names from the data dictionary
    try:
        dd_columns = data_dictionary.get_columns()  # Use get_columns() from your DataDictionary class
        if not dd_columns:
            raise RuntimeError("Data dictionary returned no columns.")
    except Exception as e:
        raise RuntimeError(f"Failed to retrieve columns from data dictionary: {str(e)}")

    print("Available columns:")
    for col in sorted(dd_columns):
        print(f"  • {col}")
    
    remaining_columns = set(dd_columns)
    filtered_profiles = profiles
    
    while True:
        if not remaining_columns:
            print("\nNo more columns available for filtering.")
            break
        
        print("\nColumns available to filter on:")
        for col in sorted(remaining_columns):
            print(f"  • {col}")
        
        column = input("\nEnter column name to filter (press Enter to finish): ").strip()
        
        if not column:
            break  # Stop filtering when user presses Enter
        
        if column not in remaining_columns:
            print(f"\nError: Column '{column}' not found or already used for filtering.")
            continue

        value = input(f"Enter value to filter for '{column}' (press Enter to skip): ").strip()
        
        if not value:
            print("\nNo value entered. Skipping this filter.")
            continue
        
        new_filtered_profiles = [
            profile for profile in filtered_profiles
            if value.lower() in str(profile.get_attributes().get(column, "")).lower()
        ]
        
        if not new_filtered_profiles:
            print(f"\nNo matches for '{column}' containing '{value}'. Returning to previous state.")
            continue
        
        filtered_profiles = new_filtered_profiles
        remaining_columns.remove(column)
        
        print(f"\nFound {len(filtered_profiles)} matching profiles")
        print(f"Profiles filtered out: {len(profiles) - len(filtered_profiles)}")
        
        confirm = input("\nProceed with another filter? (Yes/No): ").strip().lower()
        while confirm not in ['yes', 'no']:
            confirm = input("Invalid input. Please enter 'Yes' or 'No': ").strip().lower()
        
        if confirm == 'no':
            break
    
    return filtered_profiles



def generate_file_excerpt(file_path, pattern, max_chars=5000):
    # Step 1: Read the file content
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Step 2: Extract lines starting with "pattern"
    extracted_lines = [line.replace(pattern, '').strip() for line in lines if line.startswith(pattern) and len(line.split()) >= 6]

    # Step 3: Join all extracted lines into a single string
    full_text = '\n'.join(extracted_lines)

    # Step 4: Return the first max_chars characters
    return full_text[-max_chars:]  # Taking the last max_chars characters


def generate_dict_from_file(file_name, column_name1, column_name2):
    df = pd.read_excel(file_name, usecols=[column_name1, column_name2], engine='openpyxl')  # Specify the engine

    # Convert the DataFrame to a dictionary with Questions as keys and Answers as values
    ordered_dict = OrderedDict(zip(df[column_name1], df[column_name2]))

    return ordered_dict

def find_latest_timestamped_file(directory, filename_pattern):
    """Finds the file with the latest timestamp within a given directory.

    Args:
        directory: The directory to search for files.
        filename_pattern: The pattern to match filenames (e.g., "interview_results.xlsx").

    Returns:
        The path to the latest timestamped file, or None if no matching files were found.
    """

    files = [f for f in os.listdir(directory) if f.endswith(filename_pattern)]
    if not files:
        print(f"Unable to find file with {filename_pattern} in {directory}")
        return None

    latest_file = sorted(files, key=lambda f: os.path.getmtime(os.path.join(directory, f)), reverse=True)[0]
    return os.path.join(directory, latest_file)
  
  
def generate_pivot_table(original_table, index, columns, values):
    # Step 1: Flatten all SurveyEntry objects into a DataFrame
    df = pd.json_normalize(entry.dict() for report in original_table for entry in report.Entries)
    
    # Step 2: Extract the original order of 'columns' (e.g., questions)
    original_order = df[columns].drop_duplicates().tolist()
    
    # Step 3: Pivot the DataFrame
    summary_df = df.pivot(index=index, columns=columns, values=values)
    
    # Step 4: Reindex to preserve the original order of columns
    summary_df = summary_df.reindex(columns=original_order).reset_index().fillna("No Response")
    
    # Return the summary DataFrame
    return summary_df