Spaces:
Runtime error
Runtime error
umairahmad89 commited on
Commit ·
f0a94b0
1
Parent(s): 191079a
Add lookup to previous quarter sheet and handle no previous quarter order
Browse files
app.py
CHANGED
|
@@ -6,6 +6,9 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
| 6 |
import numpy as np
|
| 7 |
import tempfile
|
| 8 |
import os
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Load the sentence transformer model
|
| 11 |
model = SentenceTransformer('BAAI/bge-small-en-v1.5')
|
|
@@ -52,7 +55,53 @@ def filter_excel2(excel_path, min_row, max_row, sheetname):
|
|
| 52 |
return data
|
| 53 |
except Exception as e:
|
| 54 |
raise gr.Error(f"Error processing Excel 2: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
def get_embeddings(texts):
|
| 57 |
return model.encode(texts)
|
| 58 |
|
|
@@ -109,7 +158,8 @@ def update_excel(excel_path, processed_data, sheetname):
|
|
| 109 |
def process_files(excel1, excel2, min_row1, max_row1, min_row2, max_row2, sheetname):
|
| 110 |
try:
|
| 111 |
gr.Info("Starting processing...")
|
| 112 |
-
|
|
|
|
| 113 |
# Process Excel 1
|
| 114 |
gr.Info("Processing Excel 1...")
|
| 115 |
csv1_data = filter_excel1(excel1, min_row1, max_row1)
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
import tempfile
|
| 8 |
import os
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import re
|
| 11 |
+
|
| 12 |
|
| 13 |
# Load the sentence transformer model
|
| 14 |
model = SentenceTransformer('BAAI/bge-small-en-v1.5')
|
|
|
|
| 55 |
return data
|
| 56 |
except Exception as e:
|
| 57 |
raise gr.Error(f"Error processing Excel 2: {str(e)}")
|
| 58 |
+
|
| 59 |
+
def sheet_lookup(current_sheet_name, excel_file_path):
|
| 60 |
+
# Read the Excel file
|
| 61 |
+
xl = pd.ExcelFile(excel_file_path)
|
| 62 |
+
|
| 63 |
+
# Determine the previous quarter sheet name
|
| 64 |
+
match = re.match(r'(\d)Q(\d{4})', current_sheet_name)
|
| 65 |
+
if match:
|
| 66 |
+
quarter, year = map(int, match.groups())
|
| 67 |
+
prev_quarter = 4 if quarter == 1 else quarter - 1
|
| 68 |
+
prev_year = year - 1 if quarter == 1 else year
|
| 69 |
+
prev_sheet_name = f"{prev_quarter}Q{prev_year}"
|
| 70 |
+
else:
|
| 71 |
+
raise ValueError("Invalid sheet name format")
|
| 72 |
+
|
| 73 |
+
# Read the current sheet
|
| 74 |
+
current_df = xl.parse(current_sheet_name)
|
| 75 |
+
|
| 76 |
+
# Check if previous sheet exists
|
| 77 |
+
if prev_sheet_name in xl.sheet_names:
|
| 78 |
+
# Read the previous quarter sheet
|
| 79 |
+
prev_df = xl.parse(prev_sheet_name)
|
| 80 |
+
|
| 81 |
+
# Perform the lookup
|
| 82 |
+
lookup_col = 'Monitoring Tool Instance ID-AU'
|
| 83 |
+
current_df.drop_duplicates(subset=[lookup_col], keep='first', inplace=True)
|
| 84 |
+
prev_df.drop_duplicates(subset=[lookup_col], keep='first', inplace=True)
|
| 85 |
+
|
| 86 |
+
value_col = f"{prev_quarter}q CRI Profile Mapping"
|
| 87 |
+
result_col = f"{quarter}q CRI Profile Mapping"
|
| 88 |
|
| 89 |
+
# Create a dictionary for faster lookup
|
| 90 |
+
lookup_dict = dict(zip(prev_df[lookup_col], prev_df[value_col]))
|
| 91 |
+
|
| 92 |
+
# Perform the lookup and fill the result column
|
| 93 |
+
current_df[result_col] = current_df[lookup_col].map(lookup_dict).fillna('#N/A')
|
| 94 |
+
else:
|
| 95 |
+
# If previous sheet doesn't exist, fill the result column with '#N/A'
|
| 96 |
+
result_col = f"{quarter}q CRI Profile Mapping"
|
| 97 |
+
current_df[result_col] = '#N/A'
|
| 98 |
+
print(f"Warning: Previous sheet {prev_sheet_name} not found. Filling {result_col} with '#N/A'")
|
| 99 |
+
|
| 100 |
+
# Save the results back to the Excel file
|
| 101 |
+
with pd.ExcelWriter(excel_file_path, mode='a', if_sheet_exists='replace') as writer:
|
| 102 |
+
current_df.to_excel(writer, sheet_name=current_sheet_name, index=False)
|
| 103 |
+
|
| 104 |
+
print(f"Processing complete for sheet {current_sheet_name}")
|
| 105 |
def get_embeddings(texts):
|
| 106 |
return model.encode(texts)
|
| 107 |
|
|
|
|
| 158 |
def process_files(excel1, excel2, min_row1, max_row1, min_row2, max_row2, sheetname):
|
| 159 |
try:
|
| 160 |
gr.Info("Starting processing...")
|
| 161 |
+
gr.Info("Doing lookup...")
|
| 162 |
+
sheet_lookup(sheetname, excel2)
|
| 163 |
# Process Excel 1
|
| 164 |
gr.Info("Processing Excel 1...")
|
| 165 |
csv1_data = filter_excel1(excel1, min_row1, max_row1)
|