Spaces:

Alealejandrooo
/

MindBody_VS_Medserv

Sleeping

App Files Files Community

Alealejandrooo commited on May 16, 2024

Commit

62af0a0

verified ·

1 Parent(s): 3cd2d59

updated process.py

Browse files

Files changed (1) hide show

process.py +78 -56

process.py CHANGED Viewed

@@ -4,71 +4,93 @@ import re
 from datetime import timedelta
-def process_data(files_mindbody, files_medserv, tollerance, progress=gr.Progress()):
-    mindbody = load_data(files_mindbody)
-    medserv = load_data(files_medserv)
-    # Split 'Client' names into first name and last name components for both DataFrames
-    medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
-    mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
-    mindbody['DOS'] = pd.to_datetime(mindbody['DOS'], format='%d/%m/%Y')
-      # Split dates if they contain commas in the 'DOS' column of medserv
-    medserv['DOS'] = medserv['DOS'].astype(str)
-    medserv['DOS'] = medserv['DOS'].str.split(',')
-    medserv = medserv.explode('DOS')
-    # Attempt to convert dates using multiple formats
-    formats_to_try = ['%d/%m/%Y', '%Y-%m-%d']  # Add more formats as needed
-    for format_to_try in formats_to_try:
-        try:
-            medserv['DOS'] = pd.to_datetime(medserv['DOS'].str.strip(), format=format_to_try)
-            break  # Break out of loop if conversion succeeds
-        except ValueError:
-            continue  # Continue to next format if conversion fails
-    # Initialize an empty list to store unmatched rows
     unmatched_rows = []
-    rows = len(mindbody)
-    # Iterate through each row in the mindbody DataFrame
-    for idx in progress.tqdm(range(rows), desc='Analyzing files...'):
-        # Extract relevant information from the current row
-        date = mindbody.iloc[idx]['DOS']
-        first_name = mindbody.iloc[idx]['First Name']
-        last_name = mindbody.iloc[idx]['Last Name']
-        # Define the range of dates to search for a match in medserv
-        date_range = [date - timedelta(days=i) for i in range(tollerance, -tollerance-1, -1)]
-        # Remove the time component from the dates in date_range
-        date_range = [d.date() for d in date_range]
-        # Filter medserv based on the date range and name criteria
-        matches = medserv[((medserv['DOS'].dt.date.isin(date_range)) &
-                        ((medserv['First Name'].str.lower() == first_name.lower()) |
-                            (medserv['Last Name'].str.lower() == last_name.lower())))]
-        # If no match is found, append the row to the unmatched_rows list
-        if matches.empty:
-            unmatched_rows.append(mindbody.iloc[idx])
-    # Create a DataFrame from the unmatched_rows list
-    unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
-    # Specify the columns to include in the output Excel file
-    columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location', 'Item Total']
-    # Format the 'DOS' column to remove time part
-    unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
-    output_file_path = 'Comparison Results.xlsx'
-    unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
-    return output_file_path

 from datetime import timedelta
+def process_data(files_mindbody, files_medserv, tolerance, progress=gr.tqdm):
+    try:
+        mindbody = load_data(files_mindbody)
+        medserv = load_data(files_medserv)
+    except Exception as e:
+        print(f"An error occurred while loading data: {e}")
+        return None
+    try:
+        # Remove multiple commas from the 'Client' column
+        medserv['Client'] = medserv['Client'].str.replace(r',+', ',', regex=True)
+        mindbody['Client'] = mindbody['Client'].str.replace(r',+', ',', regex=True)
+        # Split 'Client' names into first name and last name components for both DataFrames
+        medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
+        mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
+    except Exception as e:
+        print(f"An error occurred while processing client names: {e}")
+    try:
+        mindbody['DOS'] = pd.to_datetime(mindbody['DOS'], format='%d/%m/%Y')
+    except Exception as e:
+        print(f"An error occurred while converting dates in mindbody: {e}")
+    try:
+        # Split dates if they contain commas in the 'DOS' column of medserv
+        medserv['DOS'] = medserv['DOS'].astype(str)
+        medserv['DOS'] = medserv['DOS'].str.split(',')
+        medserv = medserv.explode('DOS')
+        # Attempt to convert dates using multiple formats
+        formats_to_try = ['%d/%m/%Y', '%Y-%m-%d']  # Add more formats as needed
+        for format_to_try in formats_to_try:
+            try:
+                medserv['DOS'] = pd.to_datetime(medserv['DOS'].str.strip(), format=format_to_try)
+                break  # Break out of loop if conversion succeeds
+            except ValueError:
+                continue  # Continue to next format if conversion fails
+    except Exception as e:
+        print(f"An error occurred while processing dates in medserv: {e}")
     unmatched_rows = []
+    try:
+        rows = len(mindbody)
+        # Iterate through each row in the mindbody DataFrame
+        for idx in progress(range(rows), desc='Analyzing files...'):
+            # Extract relevant information from the current row
+            date = mindbody.iloc[idx]['DOS']
+            first_name = mindbody.iloc[idx]['First Name']
+            last_name = mindbody.iloc[idx]['Last Name']
+            # Define the range of dates to search for a match in medserv
+            date_range = [date - timedelta(days=i) for i in range(tolerance, -tolerance-1, -1)]
+            # Remove the time component from the dates in date_range
+            date_range = [d.date() for d in date_range]
+            # Filter medserv based on the date range and name criteria
+            matches = medserv[((medserv['DOS'].dt.date.isin(date_range)) &
+                               ((medserv['First Name'].str.lower() == first_name.lower()) |
+                                (medserv['Last Name'].str.lower() == last_name.lower())))]
+            # If no match is found, append the row to the unmatched_rows list
+            if matches.empty:
+                unmatched_rows.append(mindbody.iloc[idx])
+    except Exception as e:
+        print(f"An error occurred while analyzing files: {e}")
+    try:
+        # Create a DataFrame from the unmatched_rows list
+        unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
+        # Specify the columns to include in the output Excel file
+        columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location', 'Item Total']
+        # Format the 'DOS' column to remove time part
+        unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
+        output_file_path = 'Comparison Results.xlsx'
+        unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
+        return output_file_path
+    except Exception as e:
+        print(f"An error occurred while creating the output file: {e}")
+        return None