Spaces:

ChatCausalGPT
/

test

Configuration error

App Files Files Community

ChatCausalGPT commited on Apr 13, 2025

Commit

e8a7b1a

1 Parent(s): bc8daa2

quick

Browse files

Files changed (1) hide show

main.py +43 -25

main.py CHANGED Viewed

@@ -3,6 +3,10 @@ from openpyxl import load_workbook
 from datetime import datetime, timedelta
 import os # Added for path manipulation
 import tempfile # Import tempfile
 def process_files(file1_path, file2_path, output_filename="generated_report.xlsx"):
     """
@@ -19,27 +23,34 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
         str: The full path to the generated output Excel file in a temporary directory.
              Returns None if an error occurs during processing.
     """
     try:
         # 读取第一个文件
         header_df = pd.read_excel(file1_path, sheet_name='HEADER')
         dimension_df = pd.read_excel(file1_path, sheet_name='Dimension', skiprows=12)
         dimension_df.columns = dimension_df.iloc[0]
         dimension_df = dimension_df.iloc[1:].reset_index(drop=True)
         sand_df = pd.read_excel(file1_path, sheet_name='Sand', header=None)
         # 读取第二个文件
         wb = load_workbook(file2_path)
         # Check if 'WACKER' sheet exists
         if 'WACKER' not in wb.sheetnames:
-            print("Error: Template file must contain a sheet named 'WACKER'.")
             return None # Indicate error
         wacker_sheet = wb['WACKER']
         # 获取Sales Order Quantity和Quality Assured By
         sales_order_quantity = header_df.iloc[5, 2]
         quality_assured_by = header_df.iloc[3, 7]
         # 定义元素和行号的对应关系 (Copied from original script)
         element_row_mapping = {
@@ -51,7 +62,7 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
             'Mg': 10, 'Mn': 11, 'Na': 12, 'Ti': 13, 'Zr': 14
         }
         # 遍历Dimension表格中的每个Customer ID
         for index, row in dimension_df.iterrows():
             customer_id = row['Customer ID']
@@ -61,9 +72,11 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
             # Handle potential NaN or empty Customer ID
             if pd.isna(customer_id) or not str(customer_id).strip():
-                print(f"Skipping row {index+14} due to missing or invalid Customer ID.") # +14 accounts for header rows skipped
                 continue
             inspection_date_str = ""
             inspection_date = None # Initialize inspection_date
             try:
@@ -74,10 +87,9 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
                     inspection_date = pd.to_datetime(row['Inspection Date'])
                 inspection_date_str = inspection_date.strftime('%Y-%m-%d')
             except Exception as e:
-                print(f"Warning: Could not parse Inspection Date for Customer ID {customer_id}: {e}. Skipping date fields.")
                 # inspection_date remains None
             new_sheet_title = safe_customer_id
             # Avoid duplicate sheet names if safe_customer_id becomes the same for different original IDs
             sheet_count = 1
@@ -101,7 +113,6 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
                  new_sheet['B5'] = inspection_date_str
                  new_sheet['D5'] = (inspection_date + timedelta(days=730)).strftime('%Y-%m-%d')
             # 从sand表中获取当前customer_id的数据
             sand_rows = sand_df[sand_df[2] == customer_id] # 使用第3列（索引2）作为Crucible ID
             if not sand_rows.empty:
@@ -115,14 +126,13 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
                         if value is not None and not pd.isna(value):
                              new_sheet[f'D{target_row}'] = value
                         else:
-                             print(f"Warning: Missing or invalid sand data for {element}, Customer ID {customer_id}, Col Index {source_col}")
                              # Optionally fill with a default value or leave blank
                              # new_sheet[f'D{target_row}'] = "N/A"
                     except KeyError:
-                         print(f"Warning: Column index {source_col} not found in sand_row for {element}, Customer ID {customer_id}")
                     except Exception as e:
-                         print(f"Error filling element {element} for Customer ID {customer_id}: {e}")
             # 填充Analysis result/分析结果 (with added error handling)
             dim_mapping = {
@@ -137,19 +147,21 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
                      if value is not None and not pd.isna(value):
                           new_sheet[f'D{target_row}'] = value
                      else:
-                         print(f"Warning: Missing or invalid dimension data for {source_col_name}, Customer ID {customer_id}")
                          # Optionally fill with a default value or leave blank
                          # new_sheet[f'D{target_row}'] = "N/A"
                  except KeyError:
-                     print(f"Warning: Column '{source_col_name}' not found in dimension_df for Customer ID {customer_id}")
                  except Exception as e:
-                     print(f"Error filling dimension {source_col_name} for Customer ID {customer_id}: {e}")
             # 保持"批准人："文本，并在其后添加名字
             new_sheet['D29'] = f"批准人：{quality_assured_by}"
         # Remove the original template sheet if it exists and wasn't intended to be kept
         if 'WACKER' in wb.sheetnames:
              del wb['WACKER'] # Remove template if no longer needed
         # Create a temporary file path for the output
@@ -163,41 +175,47 @@ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx
             # Let's use a predictable name within the temp dir, might be easier for Gradio/platform
             temp_output_path = os.path.join(temp_dir, safe_output_filename)
-            print(f"Attempting to save report to temporary path: {temp_output_path}")
             wb.save(temp_output_path)
-            print(f"Successfully saved report to: {temp_output_path}")
             return temp_output_path # Return the full path to the temporary file
         except Exception as save_error:
-            print(f"Error saving workbook to temporary path {temp_output_path}: {save_error}")
-            import traceback
-            print(traceback.format_exc())
             return None
     except FileNotFoundError:
-        print(f"Error: Input file not found. Check paths: {file1_path}, {file2_path}")
         return None
     except KeyError as e:
-        print(f"Error: Missing expected column or sheet name: {e}. Check input file formats.")
         return None
     except Exception as e:
         # Log other unexpected errors
-        import traceback
-        print(f"An unexpected error occurred in process_files: {e}")
-        print(traceback.format_exc())
         return None
 # Keep the original script behavior if run directly (optional)
 if __name__ == "__main__":
     # Define default input/output files for direct execution
     default_file1 = '1.xls'
     default_file2 = '2.xlsx'
     default_output = '2_updated.xlsx'
     print(f"Running script directly. Processing {default_file1} and {default_file2}...")
     output_path = process_files(default_file1, default_file2, default_output)
     if output_path:
         print(f"Report generated successfully: {output_path}")
     else:
-        print("Report generation failed.")

 from datetime import datetime, timedelta
 import os # Added for path manipulation
 import tempfile # Import tempfile
+import logging # Import logging
+# Get logger for this module
+logger = logging.getLogger(__name__)
 def process_files(file1_path, file2_path, output_filename="generated_report.xlsx"):
     """
         str: The full path to the generated output Excel file in a temporary directory.
              Returns None if an error occurs during processing.
     """
+    logger.info(f"Starting report generation with input files: {file1_path}, {file2_path}")
     try:
         # 读取第一个文件
+        logger.info(f"Reading header from {file1_path}")
         header_df = pd.read_excel(file1_path, sheet_name='HEADER')
+        logger.info(f"Reading dimension data from {file1_path}")
         dimension_df = pd.read_excel(file1_path, sheet_name='Dimension', skiprows=12)
         dimension_df.columns = dimension_df.iloc[0]
         dimension_df = dimension_df.iloc[1:].reset_index(drop=True)
+        logger.info(f"Reading sand data from {file1_path}")
         sand_df = pd.read_excel(file1_path, sheet_name='Sand', header=None)
+        logger.info(f"Finished reading data from {file1_path}")
         # 读取第二个文件
+        logger.info(f"Loading template workbook from {file2_path}")
         wb = load_workbook(file2_path)
         # Check if 'WACKER' sheet exists
         if 'WACKER' not in wb.sheetnames:
+            logger.error(f"Template file '{file2_path}' must contain a sheet named 'WACKER'.")
             return None # Indicate error
         wacker_sheet = wb['WACKER']
+        logger.info("Template workbook loaded successfully.")
         # 获取Sales Order Quantity和Quality Assured By
         sales_order_quantity = header_df.iloc[5, 2]
         quality_assured_by = header_df.iloc[3, 7]
+        logger.info(f"Retrieved Sales Order Qty: {sales_order_quantity}, Quality Assured By: {quality_assured_by}")
         # 定义元素和行号的对应关系 (Copied from original script)
         element_row_mapping = {
             'Mg': 10, 'Mn': 11, 'Na': 12, 'Ti': 13, 'Zr': 14
         }
+        logger.info(f"Processing {len(dimension_df)} entries from dimension data.")
         # 遍历Dimension表格中的每个Customer ID
         for index, row in dimension_df.iterrows():
             customer_id = row['Customer ID']
             # Handle potential NaN or empty Customer ID
             if pd.isna(customer_id) or not str(customer_id).strip():
+                logger.warning(f"Skipping row {index+14} due to missing or invalid Customer ID.")
                 continue
+            logger.debug(f"Processing Customer ID: {customer_id} (Index: {index})") # Use debug for per-item processing
             inspection_date_str = ""
             inspection_date = None # Initialize inspection_date
             try:
                     inspection_date = pd.to_datetime(row['Inspection Date'])
                 inspection_date_str = inspection_date.strftime('%Y-%m-%d')
             except Exception as e:
+                logger.warning(f"Could not parse Inspection Date for Customer ID {customer_id}: {e}. Skipping date fields.")
                 # inspection_date remains None
             new_sheet_title = safe_customer_id
             # Avoid duplicate sheet names if safe_customer_id becomes the same for different original IDs
             sheet_count = 1
                  new_sheet['B5'] = inspection_date_str
                  new_sheet['D5'] = (inspection_date + timedelta(days=730)).strftime('%Y-%m-%d')
             # 从sand表中获取当前customer_id的数据
             sand_rows = sand_df[sand_df[2] == customer_id] # 使用第3列（索引2）作为Crucible ID
             if not sand_rows.empty:
                         if value is not None and not pd.isna(value):
                              new_sheet[f'D{target_row}'] = value
                         else:
+                             logger.warning(f"Missing or invalid sand data for {element}, Customer ID {customer_id}, Col Index {source_col}")
                              # Optionally fill with a default value or leave blank
                              # new_sheet[f'D{target_row}'] = "N/A"
                     except KeyError:
+                         logger.warning(f"Column index {source_col} not found in sand_row for {element}, Customer ID {customer_id}")
                     except Exception as e:
+                         logger.error(f"Error filling element {element} for Customer ID {customer_id}: {e}")
             # 填充Analysis result/分析结果 (with added error handling)
             dim_mapping = {
                      if value is not None and not pd.isna(value):
                           new_sheet[f'D{target_row}'] = value
                      else:
+                         logger.warning(f"Missing or invalid dimension data for {source_col_name}, Customer ID {customer_id}")
                          # Optionally fill with a default value or leave blank
                          # new_sheet[f'D{target_row}'] = "N/A"
                  except KeyError:
+                     logger.warning(f"Column '{source_col_name}' not found in dimension_df for Customer ID {customer_id}")
                  except Exception as e:
+                     logger.error(f"Error filling dimension {source_col_name} for Customer ID {customer_id}: {e}")
             # 保持"批准人："文本，并在其后添加名字
             new_sheet['D29'] = f"批准人：{quality_assured_by}"
+            logger.debug(f"Finished processing data for Customer ID: {customer_id}") # Use debug
         # Remove the original template sheet if it exists and wasn't intended to be kept
         if 'WACKER' in wb.sheetnames:
+             logger.info("Removing 'WACKER' template sheet from the output workbook.")
              del wb['WACKER'] # Remove template if no longer needed
         # Create a temporary file path for the output
             # Let's use a predictable name within the temp dir, might be easier for Gradio/platform
             temp_output_path = os.path.join(temp_dir, safe_output_filename)
+            logger.info(f"Attempting to save report to temporary path: {temp_output_path}")
             wb.save(temp_output_path)
+            logger.info(f"Successfully saved report to: {temp_output_path}")
             return temp_output_path # Return the full path to the temporary file
         except Exception as save_error:
+            logger.exception(f"Error saving workbook to temporary path {temp_output_path}") # Log exception
             return None
     except FileNotFoundError:
+        logger.error(f"Error: Input file not found. Check paths: {file1_path}, {file2_path}")
         return None
     except KeyError as e:
+        logger.exception(f"Error: Missing expected column or sheet name: {e}. Check input file formats.")
         return None
     except Exception as e:
         # Log other unexpected errors
+        logger.exception(f"An unexpected error occurred in process_files: {e}")
         return None
 # Keep the original script behavior if run directly (optional)
 if __name__ == "__main__":
+    # Configure basic logging for direct script execution if needed
+    # Note: app.py usually handles the main config when run via Gradio
+    if not logging.getLogger().hasHandlers(): # Only configure if not already configured by app.py import
+        log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        logging.basicConfig(level=logging.INFO, format=log_format)
     # Define default input/output files for direct execution
     default_file1 = '1.xls'
     default_file2 = '2.xlsx'
     default_output = '2_updated.xlsx'
     print(f"Running script directly. Processing {default_file1} and {default_file2}...")
+    # Use logger here too if desired, but print might be fine for direct runs
+    logger.info(f"Running script directly. Processing {default_file1} and {default_file2}...")
     output_path = process_files(default_file1, default_file2, default_output)
     if output_path:
         print(f"Report generated successfully: {output_path}")
+        logger.info(f"Direct run: Report generated successfully: {output_path}")
     else:
+        print("Report generation failed.")
+        logger.error("Direct run: Report generation failed.")