Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,8 +14,7 @@ model = genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
|
|
| 14 |
|
| 15 |
def clean_column_name(col_name):
|
| 16 |
"""
|
| 17 |
-
Clean column names to
|
| 18 |
-
Converts to lowercase and replaces non-alphanumeric characters with underscores.
|
| 19 |
"""
|
| 20 |
if not isinstance(col_name, str):
|
| 21 |
return str(col_name)
|
|
@@ -38,23 +37,21 @@ def clean_tin_value(val):
|
|
| 38 |
def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
| 39 |
"""
|
| 40 |
Standardize DataFrame column names and data types:
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
- Combines duplicate key columns into one.
|
| 47 |
-
- Forces key columns (tin and employee_name) to be strings.
|
| 48 |
"""
|
| 49 |
-
# Drop
|
| 50 |
middle_name_cols = [col for col in df.columns if 'middle_name' in col.lower()]
|
| 51 |
if middle_name_cols:
|
| 52 |
df = df.drop(columns=middle_name_cols)
|
| 53 |
|
| 54 |
-
# Clean all column names
|
| 55 |
df.columns = [clean_column_name(col) for col in df.columns]
|
| 56 |
|
| 57 |
-
#
|
| 58 |
rename_map = {}
|
| 59 |
for col in df.columns:
|
| 60 |
if col in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col:
|
|
@@ -66,7 +63,7 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 66 |
if rename_map:
|
| 67 |
df = df.rename(columns=rename_map)
|
| 68 |
|
| 69 |
-
# Combine duplicate columns
|
| 70 |
if 'salary' in df.columns and list(df.columns).count('salary') > 1:
|
| 71 |
salary_cols = [col for col in df.columns if col == 'salary']
|
| 72 |
df['salary'] = df[salary_cols].bfill(axis=1).iloc[:, 0]
|
|
@@ -76,11 +73,11 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 76 |
df['tin'] = df[tin_cols].bfill(axis=1).iloc[:, 0]
|
| 77 |
df = df.loc[:, ~df.columns.duplicated()]
|
| 78 |
|
| 79 |
-
#
|
| 80 |
if 'employee_name' not in df.columns and 'first_name' in df.columns and 'last_name' in df.columns:
|
| 81 |
df['employee_name'] = df['first_name'].astype(str).str.strip() + ' ' + df['last_name'].astype(str).str.strip()
|
| 82 |
|
| 83 |
-
# Ensure
|
| 84 |
if 'salary' in df.columns:
|
| 85 |
df['salary'] = pd.to_numeric(df['salary'], errors='coerce')
|
| 86 |
if 'tin' in df.columns:
|
|
@@ -92,9 +89,7 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 92 |
|
| 93 |
def analyze_columns(df: pd.DataFrame, filename: str) -> dict:
|
| 94 |
"""
|
| 95 |
-
|
| 96 |
-
Returns a JSON object with details about columns, key columns for merging,
|
| 97 |
-
any data quality issues, and suggested renames.
|
| 98 |
"""
|
| 99 |
try:
|
| 100 |
display_df = df.head(5).copy()
|
|
@@ -102,35 +97,12 @@ def analyze_columns(df: pd.DataFrame, filename: str) -> dict:
|
|
| 102 |
display_df[col] = display_df[col].astype(str)
|
| 103 |
sample_csv = display_df.to_csv(index=False)
|
| 104 |
prompt = f"""
|
| 105 |
-
Analyze this CSV data, which may represent an employee earnings schedule, PAYE figures, or template info
|
| 106 |
-
|
| 107 |
Filename: {filename}
|
| 108 |
Sample data (first 5 rows):
|
| 109 |
{sample_csv}
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
Please analyze the columns in the sample data and identify potential key columns for merging. Also, report any data quality issues and suggest renames to standardize the column names.
|
| 114 |
-
|
| 115 |
-
Respond with ONLY a valid JSON object in the following format:
|
| 116 |
-
|
| 117 |
-
{{
|
| 118 |
-
"subject": "Employee payroll data analysis",
|
| 119 |
-
"columns": [
|
| 120 |
-
{{
|
| 121 |
-
"name": "column_name",
|
| 122 |
-
"type": "string/number/date",
|
| 123 |
-
"description": "Brief description of the column and its likely content."
|
| 124 |
-
}}
|
| 125 |
-
],
|
| 126 |
-
"key_columns": ["List of identified key column names. Prioritize employee identifiers like employee_id, tin, or employee_name."],
|
| 127 |
-
"issues": ["List any data quality issues found, like missing values in important columns."],
|
| 128 |
-
"suggested_renames": {{
|
| 129 |
-
"old_name": "new_name"
|
| 130 |
-
}}
|
| 131 |
-
}}
|
| 132 |
-
|
| 133 |
-
Ensure the JSON response is valid and parsable.
|
| 134 |
"""
|
| 135 |
response = model.generate_content(prompt)
|
| 136 |
response_text = response.text.strip()
|
|
@@ -146,27 +118,14 @@ def analyze_columns(df: pd.DataFrame, filename: str) -> dict:
|
|
| 146 |
st.error(f"JSON parsing error: {str(je)}")
|
| 147 |
st.text("Raw response:")
|
| 148 |
st.text(response_text)
|
| 149 |
-
return {
|
| 150 |
-
"subject": "Error parsing analysis",
|
| 151 |
-
"columns": [],
|
| 152 |
-
"key_columns": [],
|
| 153 |
-
"issues": ["Error analyzing columns"],
|
| 154 |
-
"suggested_renames": {},
|
| 155 |
-
}
|
| 156 |
except Exception as e:
|
| 157 |
st.error(f"Error in column analysis: {str(e)}")
|
| 158 |
-
return {
|
| 159 |
-
"subject": "Error in analysis",
|
| 160 |
-
"columns": [],
|
| 161 |
-
"key_columns": [],
|
| 162 |
-
"issues": [str(e)],
|
| 163 |
-
"suggested_renames": {},
|
| 164 |
-
}
|
| 165 |
|
| 166 |
def read_excel_file(file) -> pd.DataFrame:
|
| 167 |
"""
|
| 168 |
Read an Excel file with error handling.
|
| 169 |
-
Tries openpyxl first and falls back to xlrd.
|
| 170 |
"""
|
| 171 |
try:
|
| 172 |
return pd.read_excel(file, engine="openpyxl")
|
|
@@ -179,19 +138,17 @@ def read_excel_file(file) -> pd.DataFrame:
|
|
| 179 |
|
| 180 |
def merge_with_master(processed_files):
|
| 181 |
"""
|
| 182 |
-
Merge
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
3. Check that the merged earnings-template data has a 'tin' column populated.
|
| 188 |
-
If present, merge the resulting DataFrame with the PAYE file using 'tin'.
|
| 189 |
"""
|
| 190 |
earnings_file = None
|
| 191 |
paye_file = None
|
| 192 |
template_file = None
|
| 193 |
|
| 194 |
-
# Identify files
|
| 195 |
for file_info in processed_files:
|
| 196 |
lower_filename = file_info["filename"].lower()
|
| 197 |
if "earnings" in lower_filename:
|
|
@@ -205,49 +162,43 @@ def merge_with_master(processed_files):
|
|
| 205 |
st.warning("No earnings file found as master. Using the first file as master.")
|
| 206 |
earnings_file = processed_files[0]
|
| 207 |
|
| 208 |
-
#
|
| 209 |
earnings_df = earnings_file["df"]
|
| 210 |
-
# Drop the inaccurate 'tin' column from earnings if it exists.
|
| 211 |
if 'tin' in earnings_df.columns:
|
| 212 |
earnings_df = earnings_df.drop(columns=['tin'])
|
| 213 |
-
# Remove any middle_name column.
|
| 214 |
if 'middle_name' in earnings_df.columns:
|
| 215 |
earnings_df = earnings_df.drop(columns=['middle_name'])
|
| 216 |
|
| 217 |
merged_df = earnings_df.copy()
|
| 218 |
|
| 219 |
-
#
|
| 220 |
if template_file is not None:
|
| 221 |
st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
|
| 222 |
template_df = template_file["df"].copy()
|
| 223 |
-
# Force the first column
|
| 224 |
if not template_df.empty:
|
| 225 |
cols = list(template_df.columns)
|
| 226 |
cols[0] = "tin"
|
| 227 |
template_df.columns = cols
|
| 228 |
-
|
| 229 |
-
# Remove any middle_name column from the template file.
|
| 230 |
if 'middle_name' in template_df.columns:
|
| 231 |
template_df = template_df.drop(columns=['middle_name'])
|
| 232 |
-
#
|
| 233 |
if 'employee_name' not in template_df.columns and 'first_name' in template_df.columns and 'last_name' in template_df.columns:
|
| 234 |
template_df['employee_name'] = template_df['first_name'].astype(str).str.strip() + ' ' + template_df['last_name'].astype(str).str.strip()
|
| 235 |
-
# If after standardization the template still doesn't have employee_name,
|
| 236 |
-
# you may need to construct it manually if possible.
|
| 237 |
if 'employee_name' in merged_df.columns and 'employee_name' in template_df.columns:
|
| 238 |
merged_df = merged_df.merge(template_df, on='employee_name', how='left', suffixes=('', '_template'))
|
| 239 |
else:
|
| 240 |
-
st.warning("Column 'employee_name' missing in either
|
| 241 |
else:
|
| 242 |
-
st.warning("No template file detected. Cannot proceed without a trusted TIN
|
| 243 |
|
| 244 |
-
# Check
|
| 245 |
if 'tin' not in merged_df.columns or merged_df['tin'].isnull().all():
|
| 246 |
st.error("No trusted 'tin' column found in the merged earnings-template data. Aborting further merge. "
|
| 247 |
-
"
|
| 248 |
return merged_df
|
| 249 |
|
| 250 |
-
# Merge PAYE
|
| 251 |
if paye_file is not None:
|
| 252 |
st.write(f"Merging PAYE figures from '{paye_file['filename']}' using key 'tin'.")
|
| 253 |
paye_df = paye_file["df"]
|
|
@@ -262,17 +213,14 @@ def merge_with_master(processed_files):
|
|
| 262 |
|
| 263 |
def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
|
| 264 |
"""
|
| 265 |
-
|
| 266 |
-
and replacing common null placeholders.
|
| 267 |
"""
|
| 268 |
return df.astype(str).replace({"nan": "", "None": ""})
|
| 269 |
|
| 270 |
def main():
|
| 271 |
st.title("Smart CSV Processor")
|
| 272 |
st.write("Upload CSV or Excel files for intelligent analysis and merging.")
|
| 273 |
-
uploaded_files = st.file_uploader(
|
| 274 |
-
"Choose files", accept_multiple_files=True, type=["csv", "xlsx", "xls"]
|
| 275 |
-
)
|
| 276 |
if uploaded_files:
|
| 277 |
st.write("### Processing Files")
|
| 278 |
processed_files = []
|
|
@@ -287,7 +235,6 @@ def main():
|
|
| 287 |
if df.empty:
|
| 288 |
st.warning(f"DataFrame from '{uploaded_file.name}' is empty. Please check the file.")
|
| 289 |
continue
|
| 290 |
-
# Standardize columns and key identifiers.
|
| 291 |
df = standardize_dataframe(df)
|
| 292 |
st.write("Initial Preview:")
|
| 293 |
st.dataframe(df.head())
|
|
@@ -296,12 +243,9 @@ def main():
|
|
| 296 |
if analysis:
|
| 297 |
st.write("Column Analysis:")
|
| 298 |
st.json(analysis)
|
| 299 |
-
# Apply any suggested renames from the analysis.
|
| 300 |
if 'suggested_renames' in analysis:
|
| 301 |
df = df.rename(columns=analysis['suggested_renames'])
|
| 302 |
-
processed_files.append(
|
| 303 |
-
{"filename": uploaded_file.name, "df": df, "analysis": analysis}
|
| 304 |
-
)
|
| 305 |
else:
|
| 306 |
st.error(f"Could not read data from '{uploaded_file.name}'.")
|
| 307 |
except Exception as e:
|
|
@@ -317,12 +261,7 @@ def main():
|
|
| 317 |
st.dataframe(safe_display_df(merged_df.head()))
|
| 318 |
try:
|
| 319 |
csv = merged_df.to_csv(index=False)
|
| 320 |
-
st.download_button(
|
| 321 |
-
label="Download Merged CSV",
|
| 322 |
-
data=csv,
|
| 323 |
-
file_name="merged_data.csv",
|
| 324 |
-
mime="text/csv",
|
| 325 |
-
)
|
| 326 |
st.write("### Dataset Statistics")
|
| 327 |
st.write(f"Total rows: {len(merged_df)}")
|
| 328 |
st.write(f"Total columns: {len(merged_df.columns)}")
|
|
|
|
| 14 |
|
| 15 |
def clean_column_name(col_name):
|
| 16 |
"""
|
| 17 |
+
Clean column names: convert to lowercase, replace non-alphanumeric characters with underscores.
|
|
|
|
| 18 |
"""
|
| 19 |
if not isinstance(col_name, str):
|
| 20 |
return str(col_name)
|
|
|
|
| 37 |
def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
| 38 |
"""
|
| 39 |
Standardize DataFrame column names and data types:
|
| 40 |
+
- Drop any middle name columns.
|
| 41 |
+
- Clean column names (e.g. "Employee Name" becomes "employee_name").
|
| 42 |
+
- Rename synonyms (e.g., "Personal ID of Employee" to "tin").
|
| 43 |
+
- If missing, construct an 'employee_name' column from first and last names.
|
| 44 |
+
- Ensure key columns (tin and employee_name) are strings.
|
|
|
|
|
|
|
| 45 |
"""
|
| 46 |
+
# Drop columns containing 'middle_name'
|
| 47 |
middle_name_cols = [col for col in df.columns if 'middle_name' in col.lower()]
|
| 48 |
if middle_name_cols:
|
| 49 |
df = df.drop(columns=middle_name_cols)
|
| 50 |
|
| 51 |
+
# Clean all column names
|
| 52 |
df.columns = [clean_column_name(col) for col in df.columns]
|
| 53 |
|
| 54 |
+
# Rename synonyms for TIN and salary
|
| 55 |
rename_map = {}
|
| 56 |
for col in df.columns:
|
| 57 |
if col in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col:
|
|
|
|
| 63 |
if rename_map:
|
| 64 |
df = df.rename(columns=rename_map)
|
| 65 |
|
| 66 |
+
# Combine duplicate columns if necessary
|
| 67 |
if 'salary' in df.columns and list(df.columns).count('salary') > 1:
|
| 68 |
salary_cols = [col for col in df.columns if col == 'salary']
|
| 69 |
df['salary'] = df[salary_cols].bfill(axis=1).iloc[:, 0]
|
|
|
|
| 73 |
df['tin'] = df[tin_cols].bfill(axis=1).iloc[:, 0]
|
| 74 |
df = df.loc[:, ~df.columns.duplicated()]
|
| 75 |
|
| 76 |
+
# Construct employee_name if missing
|
| 77 |
if 'employee_name' not in df.columns and 'first_name' in df.columns and 'last_name' in df.columns:
|
| 78 |
df['employee_name'] = df['first_name'].astype(str).str.strip() + ' ' + df['last_name'].astype(str).str.strip()
|
| 79 |
|
| 80 |
+
# Ensure proper types for key columns
|
| 81 |
if 'salary' in df.columns:
|
| 82 |
df['salary'] = pd.to_numeric(df['salary'], errors='coerce')
|
| 83 |
if 'tin' in df.columns:
|
|
|
|
| 89 |
|
| 90 |
def analyze_columns(df: pd.DataFrame, filename: str) -> dict:
|
| 91 |
"""
|
| 92 |
+
Use Gemini AI to analyze DataFrame columns and suggest key columns and renames.
|
|
|
|
|
|
|
| 93 |
"""
|
| 94 |
try:
|
| 95 |
display_df = df.head(5).copy()
|
|
|
|
| 97 |
display_df[col] = display_df[col].astype(str)
|
| 98 |
sample_csv = display_df.to_csv(index=False)
|
| 99 |
prompt = f"""
|
| 100 |
+
Analyze this CSV data, which may represent an employee earnings schedule, PAYE figures, or template info for payroll processing.
|
|
|
|
| 101 |
Filename: {filename}
|
| 102 |
Sample data (first 5 rows):
|
| 103 |
{sample_csv}
|
| 104 |
+
Identify potential key columns for merging and suggest renames.
|
| 105 |
+
Respond with a valid JSON object.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
"""
|
| 107 |
response = model.generate_content(prompt)
|
| 108 |
response_text = response.text.strip()
|
|
|
|
| 118 |
st.error(f"JSON parsing error: {str(je)}")
|
| 119 |
st.text("Raw response:")
|
| 120 |
st.text(response_text)
|
| 121 |
+
return {"subject": "Error parsing analysis", "columns": [], "key_columns": [], "issues": ["Error analyzing columns"], "suggested_renames": {}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
except Exception as e:
|
| 123 |
st.error(f"Error in column analysis: {str(e)}")
|
| 124 |
+
return {"subject": "Error in analysis", "columns": [], "key_columns": [], "issues": [str(e)], "suggested_renames": {}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
def read_excel_file(file) -> pd.DataFrame:
|
| 127 |
"""
|
| 128 |
Read an Excel file with error handling.
|
|
|
|
| 129 |
"""
|
| 130 |
try:
|
| 131 |
return pd.read_excel(file, engine="openpyxl")
|
|
|
|
| 138 |
|
| 139 |
def merge_with_master(processed_files):
|
| 140 |
"""
|
| 141 |
+
Merge DataFrames in two steps:
|
| 142 |
+
1. Use the earnings file as master (dropping its inaccurate 'tin').
|
| 143 |
+
2. Merge the template file (which supplies the trusted TIN via its first column)
|
| 144 |
+
with the earnings data using 'employee_name'.
|
| 145 |
+
3. Finally, merge the combined data with the PAYE file using 'tin'.
|
|
|
|
|
|
|
| 146 |
"""
|
| 147 |
earnings_file = None
|
| 148 |
paye_file = None
|
| 149 |
template_file = None
|
| 150 |
|
| 151 |
+
# Identify files by filename keywords
|
| 152 |
for file_info in processed_files:
|
| 153 |
lower_filename = file_info["filename"].lower()
|
| 154 |
if "earnings" in lower_filename:
|
|
|
|
| 162 |
st.warning("No earnings file found as master. Using the first file as master.")
|
| 163 |
earnings_file = processed_files[0]
|
| 164 |
|
| 165 |
+
# Process earnings file: drop its inaccurate TIN column
|
| 166 |
earnings_df = earnings_file["df"]
|
|
|
|
| 167 |
if 'tin' in earnings_df.columns:
|
| 168 |
earnings_df = earnings_df.drop(columns=['tin'])
|
|
|
|
| 169 |
if 'middle_name' in earnings_df.columns:
|
| 170 |
earnings_df = earnings_df.drop(columns=['middle_name'])
|
| 171 |
|
| 172 |
merged_df = earnings_df.copy()
|
| 173 |
|
| 174 |
+
# Process and merge the template file using employee_name
|
| 175 |
if template_file is not None:
|
| 176 |
st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
|
| 177 |
template_df = template_file["df"].copy()
|
| 178 |
+
# Force the first column (Personal ID of Employee) to be 'tin'
|
| 179 |
if not template_df.empty:
|
| 180 |
cols = list(template_df.columns)
|
| 181 |
cols[0] = "tin"
|
| 182 |
template_df.columns = cols
|
|
|
|
|
|
|
| 183 |
if 'middle_name' in template_df.columns:
|
| 184 |
template_df = template_df.drop(columns=['middle_name'])
|
| 185 |
+
# If employee_name is not present, construct it from first_name and last_name
|
| 186 |
if 'employee_name' not in template_df.columns and 'first_name' in template_df.columns and 'last_name' in template_df.columns:
|
| 187 |
template_df['employee_name'] = template_df['first_name'].astype(str).str.strip() + ' ' + template_df['last_name'].astype(str).str.strip()
|
|
|
|
|
|
|
| 188 |
if 'employee_name' in merged_df.columns and 'employee_name' in template_df.columns:
|
| 189 |
merged_df = merged_df.merge(template_df, on='employee_name', how='left', suffixes=('', '_template'))
|
| 190 |
else:
|
| 191 |
+
st.warning("Column 'employee_name' missing in either earnings or template file. Skipping template merge.")
|
| 192 |
else:
|
| 193 |
+
st.warning("No template file detected. Cannot proceed without a trusted TIN.")
|
| 194 |
|
| 195 |
+
# Check for a trusted 'tin' column after merging earnings and template
|
| 196 |
if 'tin' not in merged_df.columns or merged_df['tin'].isnull().all():
|
| 197 |
st.error("No trusted 'tin' column found in the merged earnings-template data. Aborting further merge. "
|
| 198 |
+
"Ensure the template file's first column (Personal ID of Employee) is correctly populated.")
|
| 199 |
return merged_df
|
| 200 |
|
| 201 |
+
# Merge PAYE file using the trusted 'tin'
|
| 202 |
if paye_file is not None:
|
| 203 |
st.write(f"Merging PAYE figures from '{paye_file['filename']}' using key 'tin'.")
|
| 204 |
paye_df = paye_file["df"]
|
|
|
|
| 213 |
|
| 214 |
def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
|
| 215 |
"""
|
| 216 |
+
Convert all entries in the DataFrame to strings and replace common null placeholders.
|
|
|
|
| 217 |
"""
|
| 218 |
return df.astype(str).replace({"nan": "", "None": ""})
|
| 219 |
|
| 220 |
def main():
|
| 221 |
st.title("Smart CSV Processor")
|
| 222 |
st.write("Upload CSV or Excel files for intelligent analysis and merging.")
|
| 223 |
+
uploaded_files = st.file_uploader("Choose files", accept_multiple_files=True, type=["csv", "xlsx", "xls"])
|
|
|
|
|
|
|
| 224 |
if uploaded_files:
|
| 225 |
st.write("### Processing Files")
|
| 226 |
processed_files = []
|
|
|
|
| 235 |
if df.empty:
|
| 236 |
st.warning(f"DataFrame from '{uploaded_file.name}' is empty. Please check the file.")
|
| 237 |
continue
|
|
|
|
| 238 |
df = standardize_dataframe(df)
|
| 239 |
st.write("Initial Preview:")
|
| 240 |
st.dataframe(df.head())
|
|
|
|
| 243 |
if analysis:
|
| 244 |
st.write("Column Analysis:")
|
| 245 |
st.json(analysis)
|
|
|
|
| 246 |
if 'suggested_renames' in analysis:
|
| 247 |
df = df.rename(columns=analysis['suggested_renames'])
|
| 248 |
+
processed_files.append({"filename": uploaded_file.name, "df": df, "analysis": analysis})
|
|
|
|
|
|
|
| 249 |
else:
|
| 250 |
st.error(f"Could not read data from '{uploaded_file.name}'.")
|
| 251 |
except Exception as e:
|
|
|
|
| 261 |
st.dataframe(safe_display_df(merged_df.head()))
|
| 262 |
try:
|
| 263 |
csv = merged_df.to_csv(index=False)
|
| 264 |
+
st.download_button(label="Download Merged CSV", data=csv, file_name="merged_data.csv", mime="text/csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
st.write("### Dataset Statistics")
|
| 266 |
st.write(f"Total rows: {len(merged_df)}")
|
| 267 |
st.write(f"Total columns: {len(merged_df.columns)}")
|