Spaces:
Configuration error
Configuration error
Commit
·
e8a7b1a
1
Parent(s):
bc8daa2
quick
Browse files
main.py
CHANGED
|
@@ -3,6 +3,10 @@ from openpyxl import load_workbook
|
|
| 3 |
from datetime import datetime, timedelta
|
| 4 |
import os # Added for path manipulation
|
| 5 |
import tempfile # Import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def process_files(file1_path, file2_path, output_filename="generated_report.xlsx"):
|
| 8 |
"""
|
|
@@ -19,27 +23,34 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
|
|
| 19 |
str: The full path to the generated output Excel file in a temporary directory.
|
| 20 |
Returns None if an error occurs during processing.
|
| 21 |
"""
|
|
|
|
| 22 |
try:
|
| 23 |
# 读取第一个文件
|
|
|
|
| 24 |
header_df = pd.read_excel(file1_path, sheet_name='HEADER')
|
|
|
|
| 25 |
dimension_df = pd.read_excel(file1_path, sheet_name='Dimension', skiprows=12)
|
| 26 |
dimension_df.columns = dimension_df.iloc[0]
|
| 27 |
dimension_df = dimension_df.iloc[1:].reset_index(drop=True)
|
|
|
|
| 28 |
sand_df = pd.read_excel(file1_path, sheet_name='Sand', header=None)
|
|
|
|
| 29 |
|
| 30 |
# 读取第二个文件
|
|
|
|
| 31 |
wb = load_workbook(file2_path)
|
| 32 |
|
| 33 |
# Check if 'WACKER' sheet exists
|
| 34 |
if 'WACKER' not in wb.sheetnames:
|
| 35 |
-
|
| 36 |
return None # Indicate error
|
| 37 |
wacker_sheet = wb['WACKER']
|
| 38 |
-
|
| 39 |
|
| 40 |
# 获取Sales Order Quantity和Quality Assured By
|
| 41 |
sales_order_quantity = header_df.iloc[5, 2]
|
| 42 |
quality_assured_by = header_df.iloc[3, 7]
|
|
|
|
| 43 |
|
| 44 |
# 定义元素和行号的对应关系 (Copied from original script)
|
| 45 |
element_row_mapping = {
|
|
@@ -51,7 +62,7 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
|
|
| 51 |
'Mg': 10, 'Mn': 11, 'Na': 12, 'Ti': 13, 'Zr': 14
|
| 52 |
}
|
| 53 |
|
| 54 |
-
|
| 55 |
# 遍历Dimension表格中的每个Customer ID
|
| 56 |
for index, row in dimension_df.iterrows():
|
| 57 |
customer_id = row['Customer ID']
|
|
@@ -61,9 +72,11 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
|
|
| 61 |
|
| 62 |
# Handle potential NaN or empty Customer ID
|
| 63 |
if pd.isna(customer_id) or not str(customer_id).strip():
|
| 64 |
-
|
| 65 |
continue
|
| 66 |
|
|
|
|
|
|
|
| 67 |
inspection_date_str = ""
|
| 68 |
inspection_date = None # Initialize inspection_date
|
| 69 |
try:
|
|
@@ -74,10 +87,9 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
|
|
| 74 |
inspection_date = pd.to_datetime(row['Inspection Date'])
|
| 75 |
inspection_date_str = inspection_date.strftime('%Y-%m-%d')
|
| 76 |
except Exception as e:
|
| 77 |
-
|
| 78 |
# inspection_date remains None
|
| 79 |
|
| 80 |
-
|
| 81 |
new_sheet_title = safe_customer_id
|
| 82 |
# Avoid duplicate sheet names if safe_customer_id becomes the same for different original IDs
|
| 83 |
sheet_count = 1
|
|
@@ -101,7 +113,6 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
|
|
| 101 |
new_sheet['B5'] = inspection_date_str
|
| 102 |
new_sheet['D5'] = (inspection_date + timedelta(days=730)).strftime('%Y-%m-%d')
|
| 103 |
|
| 104 |
-
|
| 105 |
# 从sand表中获取当前customer_id的数据
|
| 106 |
sand_rows = sand_df[sand_df[2] == customer_id] # 使用第3列(索引2)作为Crucible ID
|
| 107 |
if not sand_rows.empty:
|
|
@@ -115,14 +126,13 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
|
|
| 115 |
if value is not None and not pd.isna(value):
|
| 116 |
new_sheet[f'D{target_row}'] = value
|
| 117 |
else:
|
| 118 |
-
|
| 119 |
# Optionally fill with a default value or leave blank
|
| 120 |
# new_sheet[f'D{target_row}'] = "N/A"
|
| 121 |
except KeyError:
|
| 122 |
-
|
| 123 |
except Exception as e:
|
| 124 |
-
|
| 125 |
-
|
| 126 |
|
| 127 |
# 填充Analysis result/分析结果 (with added error handling)
|
| 128 |
dim_mapping = {
|
|
@@ -137,19 +147,21 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
|
|
| 137 |
if value is not None and not pd.isna(value):
|
| 138 |
new_sheet[f'D{target_row}'] = value
|
| 139 |
else:
|
| 140 |
-
|
| 141 |
# Optionally fill with a default value or leave blank
|
| 142 |
# new_sheet[f'D{target_row}'] = "N/A"
|
| 143 |
except KeyError:
|
| 144 |
-
|
| 145 |
except Exception as e:
|
| 146 |
-
|
| 147 |
|
| 148 |
# 保持"批准人:"文本,并在其后添加名字
|
| 149 |
new_sheet['D29'] = f"批准人:{quality_assured_by}"
|
|
|
|
| 150 |
|
| 151 |
# Remove the original template sheet if it exists and wasn't intended to be kept
|
| 152 |
if 'WACKER' in wb.sheetnames:
|
|
|
|
| 153 |
del wb['WACKER'] # Remove template if no longer needed
|
| 154 |
|
| 155 |
# Create a temporary file path for the output
|
|
@@ -163,41 +175,47 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
|
|
| 163 |
# Let's use a predictable name within the temp dir, might be easier for Gradio/platform
|
| 164 |
temp_output_path = os.path.join(temp_dir, safe_output_filename)
|
| 165 |
|
| 166 |
-
|
| 167 |
wb.save(temp_output_path)
|
| 168 |
-
|
| 169 |
return temp_output_path # Return the full path to the temporary file
|
| 170 |
except Exception as save_error:
|
| 171 |
-
|
| 172 |
-
import traceback
|
| 173 |
-
print(traceback.format_exc())
|
| 174 |
return None
|
| 175 |
|
| 176 |
except FileNotFoundError:
|
| 177 |
-
|
| 178 |
return None
|
| 179 |
except KeyError as e:
|
| 180 |
-
|
| 181 |
return None
|
| 182 |
except Exception as e:
|
| 183 |
# Log other unexpected errors
|
| 184 |
-
|
| 185 |
-
print(f"An unexpected error occurred in process_files: {e}")
|
| 186 |
-
print(traceback.format_exc())
|
| 187 |
return None
|
| 188 |
|
| 189 |
|
| 190 |
# Keep the original script behavior if run directly (optional)
|
| 191 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
# Define default input/output files for direct execution
|
| 193 |
default_file1 = '1.xls'
|
| 194 |
default_file2 = '2.xlsx'
|
| 195 |
default_output = '2_updated.xlsx'
|
| 196 |
|
| 197 |
print(f"Running script directly. Processing {default_file1} and {default_file2}...")
|
|
|
|
|
|
|
| 198 |
output_path = process_files(default_file1, default_file2, default_output)
|
| 199 |
|
| 200 |
if output_path:
|
| 201 |
print(f"Report generated successfully: {output_path}")
|
|
|
|
| 202 |
else:
|
| 203 |
-
print("Report generation failed.")
|
|
|
|
|
|
| 3 |
from datetime import datetime, timedelta
|
| 4 |
import os # Added for path manipulation
|
| 5 |
import tempfile # Import tempfile
|
| 6 |
+
import logging # Import logging
|
| 7 |
+
|
| 8 |
+
# Get logger for this module
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
def process_files(file1_path, file2_path, output_filename="generated_report.xlsx"):
|
| 12 |
"""
|
|
|
|
| 23 |
str: The full path to the generated output Excel file in a temporary directory.
|
| 24 |
Returns None if an error occurs during processing.
|
| 25 |
"""
|
| 26 |
+
logger.info(f"Starting report generation with input files: {file1_path}, {file2_path}")
|
| 27 |
try:
|
| 28 |
# 读取第一个文件
|
| 29 |
+
logger.info(f"Reading header from {file1_path}")
|
| 30 |
header_df = pd.read_excel(file1_path, sheet_name='HEADER')
|
| 31 |
+
logger.info(f"Reading dimension data from {file1_path}")
|
| 32 |
dimension_df = pd.read_excel(file1_path, sheet_name='Dimension', skiprows=12)
|
| 33 |
dimension_df.columns = dimension_df.iloc[0]
|
| 34 |
dimension_df = dimension_df.iloc[1:].reset_index(drop=True)
|
| 35 |
+
logger.info(f"Reading sand data from {file1_path}")
|
| 36 |
sand_df = pd.read_excel(file1_path, sheet_name='Sand', header=None)
|
| 37 |
+
logger.info(f"Finished reading data from {file1_path}")
|
| 38 |
|
| 39 |
# 读取第二个文件
|
| 40 |
+
logger.info(f"Loading template workbook from {file2_path}")
|
| 41 |
wb = load_workbook(file2_path)
|
| 42 |
|
| 43 |
# Check if 'WACKER' sheet exists
|
| 44 |
if 'WACKER' not in wb.sheetnames:
|
| 45 |
+
logger.error(f"Template file '{file2_path}' must contain a sheet named 'WACKER'.")
|
| 46 |
return None # Indicate error
|
| 47 |
wacker_sheet = wb['WACKER']
|
| 48 |
+
logger.info("Template workbook loaded successfully.")
|
| 49 |
|
| 50 |
# 获取Sales Order Quantity和Quality Assured By
|
| 51 |
sales_order_quantity = header_df.iloc[5, 2]
|
| 52 |
quality_assured_by = header_df.iloc[3, 7]
|
| 53 |
+
logger.info(f"Retrieved Sales Order Qty: {sales_order_quantity}, Quality Assured By: {quality_assured_by}")
|
| 54 |
|
| 55 |
# 定义元素和行号的对应关系 (Copied from original script)
|
| 56 |
element_row_mapping = {
|
|
|
|
| 62 |
'Mg': 10, 'Mn': 11, 'Na': 12, 'Ti': 13, 'Zr': 14
|
| 63 |
}
|
| 64 |
|
| 65 |
+
logger.info(f"Processing {len(dimension_df)} entries from dimension data.")
|
| 66 |
# 遍历Dimension表格中的每个Customer ID
|
| 67 |
for index, row in dimension_df.iterrows():
|
| 68 |
customer_id = row['Customer ID']
|
|
|
|
| 72 |
|
| 73 |
# Handle potential NaN or empty Customer ID
|
| 74 |
if pd.isna(customer_id) or not str(customer_id).strip():
|
| 75 |
+
logger.warning(f"Skipping row {index+14} due to missing or invalid Customer ID.")
|
| 76 |
continue
|
| 77 |
|
| 78 |
+
logger.debug(f"Processing Customer ID: {customer_id} (Index: {index})") # Use debug for per-item processing
|
| 79 |
+
|
| 80 |
inspection_date_str = ""
|
| 81 |
inspection_date = None # Initialize inspection_date
|
| 82 |
try:
|
|
|
|
| 87 |
inspection_date = pd.to_datetime(row['Inspection Date'])
|
| 88 |
inspection_date_str = inspection_date.strftime('%Y-%m-%d')
|
| 89 |
except Exception as e:
|
| 90 |
+
logger.warning(f"Could not parse Inspection Date for Customer ID {customer_id}: {e}. Skipping date fields.")
|
| 91 |
# inspection_date remains None
|
| 92 |
|
|
|
|
| 93 |
new_sheet_title = safe_customer_id
|
| 94 |
# Avoid duplicate sheet names if safe_customer_id becomes the same for different original IDs
|
| 95 |
sheet_count = 1
|
|
|
|
| 113 |
new_sheet['B5'] = inspection_date_str
|
| 114 |
new_sheet['D5'] = (inspection_date + timedelta(days=730)).strftime('%Y-%m-%d')
|
| 115 |
|
|
|
|
| 116 |
# 从sand表中获取当前customer_id的数据
|
| 117 |
sand_rows = sand_df[sand_df[2] == customer_id] # 使用第3列(索引2)作为Crucible ID
|
| 118 |
if not sand_rows.empty:
|
|
|
|
| 126 |
if value is not None and not pd.isna(value):
|
| 127 |
new_sheet[f'D{target_row}'] = value
|
| 128 |
else:
|
| 129 |
+
logger.warning(f"Missing or invalid sand data for {element}, Customer ID {customer_id}, Col Index {source_col}")
|
| 130 |
# Optionally fill with a default value or leave blank
|
| 131 |
# new_sheet[f'D{target_row}'] = "N/A"
|
| 132 |
except KeyError:
|
| 133 |
+
logger.warning(f"Column index {source_col} not found in sand_row for {element}, Customer ID {customer_id}")
|
| 134 |
except Exception as e:
|
| 135 |
+
logger.error(f"Error filling element {element} for Customer ID {customer_id}: {e}")
|
|
|
|
| 136 |
|
| 137 |
# 填充Analysis result/分析结果 (with added error handling)
|
| 138 |
dim_mapping = {
|
|
|
|
| 147 |
if value is not None and not pd.isna(value):
|
| 148 |
new_sheet[f'D{target_row}'] = value
|
| 149 |
else:
|
| 150 |
+
logger.warning(f"Missing or invalid dimension data for {source_col_name}, Customer ID {customer_id}")
|
| 151 |
# Optionally fill with a default value or leave blank
|
| 152 |
# new_sheet[f'D{target_row}'] = "N/A"
|
| 153 |
except KeyError:
|
| 154 |
+
logger.warning(f"Column '{source_col_name}' not found in dimension_df for Customer ID {customer_id}")
|
| 155 |
except Exception as e:
|
| 156 |
+
logger.error(f"Error filling dimension {source_col_name} for Customer ID {customer_id}: {e}")
|
| 157 |
|
| 158 |
# 保持"批准人:"文本,并在其后添加名字
|
| 159 |
new_sheet['D29'] = f"批准人:{quality_assured_by}"
|
| 160 |
+
logger.debug(f"Finished processing data for Customer ID: {customer_id}") # Use debug
|
| 161 |
|
| 162 |
# Remove the original template sheet if it exists and wasn't intended to be kept
|
| 163 |
if 'WACKER' in wb.sheetnames:
|
| 164 |
+
logger.info("Removing 'WACKER' template sheet from the output workbook.")
|
| 165 |
del wb['WACKER'] # Remove template if no longer needed
|
| 166 |
|
| 167 |
# Create a temporary file path for the output
|
|
|
|
| 175 |
# Let's use a predictable name within the temp dir, might be easier for Gradio/platform
|
| 176 |
temp_output_path = os.path.join(temp_dir, safe_output_filename)
|
| 177 |
|
| 178 |
+
logger.info(f"Attempting to save report to temporary path: {temp_output_path}")
|
| 179 |
wb.save(temp_output_path)
|
| 180 |
+
logger.info(f"Successfully saved report to: {temp_output_path}")
|
| 181 |
return temp_output_path # Return the full path to the temporary file
|
| 182 |
except Exception as save_error:
|
| 183 |
+
logger.exception(f"Error saving workbook to temporary path {temp_output_path}") # Log exception
|
|
|
|
|
|
|
| 184 |
return None
|
| 185 |
|
| 186 |
except FileNotFoundError:
|
| 187 |
+
logger.error(f"Error: Input file not found. Check paths: {file1_path}, {file2_path}")
|
| 188 |
return None
|
| 189 |
except KeyError as e:
|
| 190 |
+
logger.exception(f"Error: Missing expected column or sheet name: {e}. Check input file formats.")
|
| 191 |
return None
|
| 192 |
except Exception as e:
|
| 193 |
# Log other unexpected errors
|
| 194 |
+
logger.exception(f"An unexpected error occurred in process_files: {e}")
|
|
|
|
|
|
|
| 195 |
return None
|
| 196 |
|
| 197 |
|
| 198 |
# Keep the original script behavior if run directly (optional)
|
| 199 |
if __name__ == "__main__":
|
| 200 |
+
# Configure basic logging for direct script execution if needed
|
| 201 |
+
# Note: app.py usually handles the main config when run via Gradio
|
| 202 |
+
if not logging.getLogger().hasHandlers(): # Only configure if not already configured by app.py import
|
| 203 |
+
log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 204 |
+
logging.basicConfig(level=logging.INFO, format=log_format)
|
| 205 |
+
|
| 206 |
# Define default input/output files for direct execution
|
| 207 |
default_file1 = '1.xls'
|
| 208 |
default_file2 = '2.xlsx'
|
| 209 |
default_output = '2_updated.xlsx'
|
| 210 |
|
| 211 |
print(f"Running script directly. Processing {default_file1} and {default_file2}...")
|
| 212 |
+
# Use logger here too if desired, but print might be fine for direct runs
|
| 213 |
+
logger.info(f"Running script directly. Processing {default_file1} and {default_file2}...")
|
| 214 |
output_path = process_files(default_file1, default_file2, default_output)
|
| 215 |
|
| 216 |
if output_path:
|
| 217 |
print(f"Report generated successfully: {output_path}")
|
| 218 |
+
logger.info(f"Direct run: Report generated successfully: {output_path}")
|
| 219 |
else:
|
| 220 |
+
print("Report generation failed.")
|
| 221 |
+
logger.error("Direct run: Report generation failed.")
|