ChatCausalGPT commited on
Commit
e8a7b1a
·
1 Parent(s): bc8daa2
Files changed (1) hide show
  1. main.py +43 -25
main.py CHANGED
@@ -3,6 +3,10 @@ from openpyxl import load_workbook
3
  from datetime import datetime, timedelta
4
  import os # Added for path manipulation
5
  import tempfile # Import tempfile
 
 
 
 
6
 
7
  def process_files(file1_path, file2_path, output_filename="generated_report.xlsx"):
8
  """
@@ -19,27 +23,34 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
19
  str: The full path to the generated output Excel file in a temporary directory.
20
  Returns None if an error occurs during processing.
21
  """
 
22
  try:
23
  # 读取第一个文件
 
24
  header_df = pd.read_excel(file1_path, sheet_name='HEADER')
 
25
  dimension_df = pd.read_excel(file1_path, sheet_name='Dimension', skiprows=12)
26
  dimension_df.columns = dimension_df.iloc[0]
27
  dimension_df = dimension_df.iloc[1:].reset_index(drop=True)
 
28
  sand_df = pd.read_excel(file1_path, sheet_name='Sand', header=None)
 
29
 
30
  # 读取第二个文件
 
31
  wb = load_workbook(file2_path)
32
 
33
  # Check if 'WACKER' sheet exists
34
  if 'WACKER' not in wb.sheetnames:
35
- print("Error: Template file must contain a sheet named 'WACKER'.")
36
  return None # Indicate error
37
  wacker_sheet = wb['WACKER']
38
-
39
 
40
  # 获取Sales Order Quantity和Quality Assured By
41
  sales_order_quantity = header_df.iloc[5, 2]
42
  quality_assured_by = header_df.iloc[3, 7]
 
43
 
44
  # 定义元素和行号的对应关系 (Copied from original script)
45
  element_row_mapping = {
@@ -51,7 +62,7 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
51
  'Mg': 10, 'Mn': 11, 'Na': 12, 'Ti': 13, 'Zr': 14
52
  }
53
 
54
-
55
  # 遍历Dimension表格中的每个Customer ID
56
  for index, row in dimension_df.iterrows():
57
  customer_id = row['Customer ID']
@@ -61,9 +72,11 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
61
 
62
  # Handle potential NaN or empty Customer ID
63
  if pd.isna(customer_id) or not str(customer_id).strip():
64
- print(f"Skipping row {index+14} due to missing or invalid Customer ID.") # +14 accounts for header rows skipped
65
  continue
66
 
 
 
67
  inspection_date_str = ""
68
  inspection_date = None # Initialize inspection_date
69
  try:
@@ -74,10 +87,9 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
74
  inspection_date = pd.to_datetime(row['Inspection Date'])
75
  inspection_date_str = inspection_date.strftime('%Y-%m-%d')
76
  except Exception as e:
77
- print(f"Warning: Could not parse Inspection Date for Customer ID {customer_id}: {e}. Skipping date fields.")
78
  # inspection_date remains None
79
 
80
-
81
  new_sheet_title = safe_customer_id
82
  # Avoid duplicate sheet names if safe_customer_id becomes the same for different original IDs
83
  sheet_count = 1
@@ -101,7 +113,6 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
101
  new_sheet['B5'] = inspection_date_str
102
  new_sheet['D5'] = (inspection_date + timedelta(days=730)).strftime('%Y-%m-%d')
103
 
104
-
105
  # 从sand表中获取当前customer_id的数据
106
  sand_rows = sand_df[sand_df[2] == customer_id] # 使用第3列(索引2)作为Crucible ID
107
  if not sand_rows.empty:
@@ -115,14 +126,13 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
115
  if value is not None and not pd.isna(value):
116
  new_sheet[f'D{target_row}'] = value
117
  else:
118
- print(f"Warning: Missing or invalid sand data for {element}, Customer ID {customer_id}, Col Index {source_col}")
119
  # Optionally fill with a default value or leave blank
120
  # new_sheet[f'D{target_row}'] = "N/A"
121
  except KeyError:
122
- print(f"Warning: Column index {source_col} not found in sand_row for {element}, Customer ID {customer_id}")
123
  except Exception as e:
124
- print(f"Error filling element {element} for Customer ID {customer_id}: {e}")
125
-
126
 
127
  # 填充Analysis result/分析结果 (with added error handling)
128
  dim_mapping = {
@@ -137,19 +147,21 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
137
  if value is not None and not pd.isna(value):
138
  new_sheet[f'D{target_row}'] = value
139
  else:
140
- print(f"Warning: Missing or invalid dimension data for {source_col_name}, Customer ID {customer_id}")
141
  # Optionally fill with a default value or leave blank
142
  # new_sheet[f'D{target_row}'] = "N/A"
143
  except KeyError:
144
- print(f"Warning: Column '{source_col_name}' not found in dimension_df for Customer ID {customer_id}")
145
  except Exception as e:
146
- print(f"Error filling dimension {source_col_name} for Customer ID {customer_id}: {e}")
147
 
148
  # 保持"批准人:"文本,并在其后添加名字
149
  new_sheet['D29'] = f"批准人:{quality_assured_by}"
 
150
 
151
  # Remove the original template sheet if it exists and wasn't intended to be kept
152
  if 'WACKER' in wb.sheetnames:
 
153
  del wb['WACKER'] # Remove template if no longer needed
154
 
155
  # Create a temporary file path for the output
@@ -163,41 +175,47 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
163
  # Let's use a predictable name within the temp dir, might be easier for Gradio/platform
164
  temp_output_path = os.path.join(temp_dir, safe_output_filename)
165
 
166
- print(f"Attempting to save report to temporary path: {temp_output_path}")
167
  wb.save(temp_output_path)
168
- print(f"Successfully saved report to: {temp_output_path}")
169
  return temp_output_path # Return the full path to the temporary file
170
  except Exception as save_error:
171
- print(f"Error saving workbook to temporary path {temp_output_path}: {save_error}")
172
- import traceback
173
- print(traceback.format_exc())
174
  return None
175
 
176
  except FileNotFoundError:
177
- print(f"Error: Input file not found. Check paths: {file1_path}, {file2_path}")
178
  return None
179
  except KeyError as e:
180
- print(f"Error: Missing expected column or sheet name: {e}. Check input file formats.")
181
  return None
182
  except Exception as e:
183
  # Log other unexpected errors
184
- import traceback
185
- print(f"An unexpected error occurred in process_files: {e}")
186
- print(traceback.format_exc())
187
  return None
188
 
189
 
190
  # Keep the original script behavior if run directly (optional)
191
  if __name__ == "__main__":
 
 
 
 
 
 
192
  # Define default input/output files for direct execution
193
  default_file1 = '1.xls'
194
  default_file2 = '2.xlsx'
195
  default_output = '2_updated.xlsx'
196
 
197
  print(f"Running script directly. Processing {default_file1} and {default_file2}...")
 
 
198
  output_path = process_files(default_file1, default_file2, default_output)
199
 
200
  if output_path:
201
  print(f"Report generated successfully: {output_path}")
 
202
  else:
203
- print("Report generation failed.")
 
 
3
  from datetime import datetime, timedelta
4
  import os # Added for path manipulation
5
  import tempfile # Import tempfile
6
+ import logging # Import logging
7
+
8
+ # Get logger for this module
9
+ logger = logging.getLogger(__name__)
10
 
11
  def process_files(file1_path, file2_path, output_filename="generated_report.xlsx"):
12
  """
 
23
  str: The full path to the generated output Excel file in a temporary directory.
24
  Returns None if an error occurs during processing.
25
  """
26
+ logger.info(f"Starting report generation with input files: {file1_path}, {file2_path}")
27
  try:
28
  # 读取第一个文件
29
+ logger.info(f"Reading header from {file1_path}")
30
  header_df = pd.read_excel(file1_path, sheet_name='HEADER')
31
+ logger.info(f"Reading dimension data from {file1_path}")
32
  dimension_df = pd.read_excel(file1_path, sheet_name='Dimension', skiprows=12)
33
  dimension_df.columns = dimension_df.iloc[0]
34
  dimension_df = dimension_df.iloc[1:].reset_index(drop=True)
35
+ logger.info(f"Reading sand data from {file1_path}")
36
  sand_df = pd.read_excel(file1_path, sheet_name='Sand', header=None)
37
+ logger.info(f"Finished reading data from {file1_path}")
38
 
39
  # 读取第二个文件
40
+ logger.info(f"Loading template workbook from {file2_path}")
41
  wb = load_workbook(file2_path)
42
 
43
  # Check if 'WACKER' sheet exists
44
  if 'WACKER' not in wb.sheetnames:
45
+ logger.error(f"Template file '{file2_path}' must contain a sheet named 'WACKER'.")
46
  return None # Indicate error
47
  wacker_sheet = wb['WACKER']
48
+ logger.info("Template workbook loaded successfully.")
49
 
50
  # 获取Sales Order Quantity和Quality Assured By
51
  sales_order_quantity = header_df.iloc[5, 2]
52
  quality_assured_by = header_df.iloc[3, 7]
53
+ logger.info(f"Retrieved Sales Order Qty: {sales_order_quantity}, Quality Assured By: {quality_assured_by}")
54
 
55
  # 定义元素和行号的对应关系 (Copied from original script)
56
  element_row_mapping = {
 
62
  'Mg': 10, 'Mn': 11, 'Na': 12, 'Ti': 13, 'Zr': 14
63
  }
64
 
65
+ logger.info(f"Processing {len(dimension_df)} entries from dimension data.")
66
  # 遍历Dimension表格中的每个Customer ID
67
  for index, row in dimension_df.iterrows():
68
  customer_id = row['Customer ID']
 
72
 
73
  # Handle potential NaN or empty Customer ID
74
  if pd.isna(customer_id) or not str(customer_id).strip():
75
+ logger.warning(f"Skipping row {index+14} due to missing or invalid Customer ID.")
76
  continue
77
 
78
+ logger.debug(f"Processing Customer ID: {customer_id} (Index: {index})") # Use debug for per-item processing
79
+
80
  inspection_date_str = ""
81
  inspection_date = None # Initialize inspection_date
82
  try:
 
87
  inspection_date = pd.to_datetime(row['Inspection Date'])
88
  inspection_date_str = inspection_date.strftime('%Y-%m-%d')
89
  except Exception as e:
90
+ logger.warning(f"Could not parse Inspection Date for Customer ID {customer_id}: {e}. Skipping date fields.")
91
  # inspection_date remains None
92
 
 
93
  new_sheet_title = safe_customer_id
94
  # Avoid duplicate sheet names if safe_customer_id becomes the same for different original IDs
95
  sheet_count = 1
 
113
  new_sheet['B5'] = inspection_date_str
114
  new_sheet['D5'] = (inspection_date + timedelta(days=730)).strftime('%Y-%m-%d')
115
 
 
116
  # 从sand表中获取当前customer_id的数据
117
  sand_rows = sand_df[sand_df[2] == customer_id] # 使用第3列(索引2)作为Crucible ID
118
  if not sand_rows.empty:
 
126
  if value is not None and not pd.isna(value):
127
  new_sheet[f'D{target_row}'] = value
128
  else:
129
+ logger.warning(f"Missing or invalid sand data for {element}, Customer ID {customer_id}, Col Index {source_col}")
130
  # Optionally fill with a default value or leave blank
131
  # new_sheet[f'D{target_row}'] = "N/A"
132
  except KeyError:
133
+ logger.warning(f"Column index {source_col} not found in sand_row for {element}, Customer ID {customer_id}")
134
  except Exception as e:
135
+ logger.error(f"Error filling element {element} for Customer ID {customer_id}: {e}")
 
136
 
137
  # 填充Analysis result/分析结果 (with added error handling)
138
  dim_mapping = {
 
147
  if value is not None and not pd.isna(value):
148
  new_sheet[f'D{target_row}'] = value
149
  else:
150
+ logger.warning(f"Missing or invalid dimension data for {source_col_name}, Customer ID {customer_id}")
151
  # Optionally fill with a default value or leave blank
152
  # new_sheet[f'D{target_row}'] = "N/A"
153
  except KeyError:
154
+ logger.warning(f"Column '{source_col_name}' not found in dimension_df for Customer ID {customer_id}")
155
  except Exception as e:
156
+ logger.error(f"Error filling dimension {source_col_name} for Customer ID {customer_id}: {e}")
157
 
158
  # 保持"批准人:"文本,并在其后添加名字
159
  new_sheet['D29'] = f"批准人:{quality_assured_by}"
160
+ logger.debug(f"Finished processing data for Customer ID: {customer_id}") # Use debug
161
 
162
  # Remove the original template sheet if it exists and wasn't intended to be kept
163
  if 'WACKER' in wb.sheetnames:
164
+ logger.info("Removing 'WACKER' template sheet from the output workbook.")
165
  del wb['WACKER'] # Remove template if no longer needed
166
 
167
  # Create a temporary file path for the output
 
175
  # Let's use a predictable name within the temp dir, might be easier for Gradio/platform
176
  temp_output_path = os.path.join(temp_dir, safe_output_filename)
177
 
178
+ logger.info(f"Attempting to save report to temporary path: {temp_output_path}")
179
  wb.save(temp_output_path)
180
+ logger.info(f"Successfully saved report to: {temp_output_path}")
181
  return temp_output_path # Return the full path to the temporary file
182
  except Exception as save_error:
183
+ logger.exception(f"Error saving workbook to temporary path {temp_output_path}") # Log exception
 
 
184
  return None
185
 
186
  except FileNotFoundError:
187
+ logger.error(f"Error: Input file not found. Check paths: {file1_path}, {file2_path}")
188
  return None
189
  except KeyError as e:
190
+ logger.exception(f"Error: Missing expected column or sheet name: {e}. Check input file formats.")
191
  return None
192
  except Exception as e:
193
  # Log other unexpected errors
194
+ logger.exception(f"An unexpected error occurred in process_files: {e}")
 
 
195
  return None
196
 
197
 
198
  # Keep the original script behavior if run directly (optional)
199
  if __name__ == "__main__":
200
+ # Configure basic logging for direct script execution if needed
201
+ # Note: app.py usually handles the main config when run via Gradio
202
+ if not logging.getLogger().hasHandlers(): # Only configure if not already configured by app.py import
203
+ log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
204
+ logging.basicConfig(level=logging.INFO, format=log_format)
205
+
206
  # Define default input/output files for direct execution
207
  default_file1 = '1.xls'
208
  default_file2 = '2.xlsx'
209
  default_output = '2_updated.xlsx'
210
 
211
  print(f"Running script directly. Processing {default_file1} and {default_file2}...")
212
+ # Use logger here too if desired, but print might be fine for direct runs
213
+ logger.info(f"Running script directly. Processing {default_file1} and {default_file2}...")
214
  output_path = process_files(default_file1, default_file2, default_output)
215
 
216
  if output_path:
217
  print(f"Report generated successfully: {output_path}")
218
+ logger.info(f"Direct run: Report generated successfully: {output_path}")
219
  else:
220
+ print("Report generation failed.")
221
+ logger.error("Direct run: Report generation failed.")