ChatCausalGPT commited on
Commit
846aae3
·
1 Parent(s): 60946a8
Files changed (3) hide show
  1. app.py +76 -0
  2. main.py +178 -108
  3. requirements.txt +2 -1
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ # Removed imports: pandas, openpyxl, datetime, timedelta, os, tempfile
4
+
5
+ # Import the processing function from main.py
6
+ from main import process_files
7
+
8
+ def generate_report(file1_obj, file2_obj):
9
+ """
10
+ Gradio wrapper function to handle file uploads and call the main processing logic.
11
+
12
+ Args:
13
+ file1_obj: Gradio File object for the first input file.
14
+ file2_obj: Gradio File object for the second input file.
15
+
16
+ Returns:
17
+ str: Path to the generated output Excel file if successful.
18
+ Raises:
19
+ gr.Error: If file processing fails.
20
+ """
21
+ if file1_obj is None or file2_obj is None:
22
+ raise gr.Error("Please upload both required files.")
23
+
24
+ try:
25
+ # Gradio provides temporary paths for uploaded files
26
+ file1_path = file1_obj.name
27
+ file2_path = file2_obj.name
28
+
29
+ # Define the output filename (can be customized if needed)
30
+ output_filename = "generated_report.xlsx"
31
+
32
+ # Call the core processing logic from main.py
33
+ print(f"Processing files: {file1_path}, {file2_path}") # Log input paths
34
+ result_path = process_files(file1_path, file2_path, output_filename)
35
+ print(f"process_files returned: {result_path}") # Log result path
36
+
37
+ if result_path:
38
+ # Return the path of the generated file for Gradio to serve
39
+ return result_path
40
+ else:
41
+ # If process_files returned None, it means an error occurred
42
+ raise gr.Error("Failed to generate the report. Check logs or input files.")
43
+
44
+ except Exception as e:
45
+ # Catch any other unexpected errors during the wrapper execution
46
+ import traceback
47
+ print(f"Error in Gradio wrapper (generate_report): {e}")
48
+ print(traceback.format_exc())
49
+ raise gr.Error(f"An unexpected error occurred: {e}")
50
+
51
+
52
+ # Create Gradio Interface
53
+ inputs = [
54
+ gr.File(label="上传数据源文件 (类似 1.xls)"),
55
+ gr.File(label="上传模板文件 (类似 2.xlsx)")
56
+ ]
57
+ outputs = gr.File(label="下载生成的报告")
58
+
59
+ title = "Quality Inspection Report Generator" # Use English title
60
+ description = "Upload the data source file and template file to generate the combined quality inspection report." # Use English description
61
+
62
+ # Ensure interface runs on the default port 7860
63
+ # share=False is default and recommended for custom deployments
64
+ # server_name="0.0.0.0" makes it accessible within the container/network
65
+ demo = gr.Interface(
66
+ fn=generate_report,
67
+ inputs=inputs,
68
+ outputs=outputs,
69
+ title=title,
70
+ description=description,
71
+ allow_flagging='never' # Disable flagging
72
+ )
73
+
74
+ if __name__ == "__main__":
75
+ # Launch the Gradio app
76
+ demo.launch(server_name="0.0.0.0") # Port defaults to 7860
main.py CHANGED
@@ -1,113 +1,183 @@
1
  import pandas as pd
2
  from openpyxl import load_workbook
3
  from datetime import datetime, timedelta
 
4
 
5
- # 读取第一个文件
6
- file1 = '1.xls'
7
- header_df = pd.read_excel(file1, sheet_name='HEADER')
8
-
9
- # 读取Dimension表,跳过前12行,然后使用第13行作为列名
10
- dimension_df = pd.read_excel(file1, sheet_name='Dimension', skiprows=12)
11
- # 使用第一行作为列名
12
- dimension_df.columns = dimension_df.iloc[0]
13
- # 删除第一行(现在已经作为列名)并重置索引
14
- dimension_df = dimension_df.iloc[1:].reset_index(drop=True)
15
-
16
- # 读取Sand表的数据
17
- sand_df = pd.read_excel(file1, sheet_name='Sand', header=None)
18
-
19
- # 读取第二个文件
20
- file2 = '2.xlsx'
21
- wb = load_workbook(file2)
22
- wacker_sheet = wb['WACKER']
23
-
24
- # 获取Sales Order Quantity和Quality Assured By
25
- sales_order_quantity = header_df.iloc[5, 2] # Sales Order Quantity位置
26
- quality_assured_by = header_df.iloc[3, 7] # Quality Assured By在第4行最后一列
27
-
28
- # 定义元素和行号的对应关系
29
- element_row_mapping = {
30
- 'Al': 9, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Al
31
- 'Ca': 10, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Ca
32
- 'Cu': 11, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Cu
33
- 'Fe': 12, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Fe
34
- 'K': 13, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_K
35
- 'Li': 14, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Li
36
- 'Mg': 15, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Mg
37
- 'Mn': 16, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Mn
38
- 'Na': 17, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Na
39
- 'Ti': 18, # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Ti
40
- 'Zr': 19 # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Zr
41
- }
42
-
43
- # 定义元素在Sand表中的列索引
44
- element_col_mapping = {
45
- 'Al': 4, # 第5列
46
- 'Ca': 5, # 第6列
47
- 'Cu': 6, # 第7列
48
- 'Fe': 7, # 第8列
49
- 'K': 8, # 第9列
50
- 'Li': 9, # 第10列
51
- 'Mg': 10, # 第11列
52
- 'Mn': 11, # 第12列
53
- 'Na': 12, # 第13列
54
- 'Ti': 13, # 第14列
55
- 'Zr': 14 # 第15列
56
- }
57
-
58
- # 遍历Dimension表格中的每个Customer ID
59
- for index, row in dimension_df.iterrows():
60
- customer_id = row['Customer ID'] # 现在这个列名应该是正确的了
61
- inspection_date = pd.to_datetime(row['Inspection Date']).strftime('%Y-%m-%d') # 格式化日期
62
-
63
- # 创建新的工作表
64
- new_sheet = wb.create_sheet(title=str(customer_id))
65
-
66
- # 复制WACKER表格的内容到新工作表(这样会保持原有的客户名称)
67
- for row_wacker in wacker_sheet.iter_rows(values_only=True):
68
- new_sheet.append(row_wacker)
69
-
70
- # 填充数据(不再覆盖客户名称)
71
- new_sheet['B3'] = str(sales_order_quantity) + ' PCS' # Number+Unit/数量+单位
72
- new_sheet['B4'] = customer_id # Batch reference/批号
73
- new_sheet['D4'] = inspection_date # Date of issue/报告日期
74
- new_sheet['B5'] = inspection_date # Production date/生产日期
75
- new_sheet['D5'] = (datetime.strptime(inspection_date, '%Y-%m-%d') + timedelta(days=730)).strftime('%Y-%m-%d') # Expiring date/失效日期
76
-
77
- # 从sand表中获取当前customer_id的数据
78
- sand_rows = sand_df[sand_df[2] == customer_id] # 使用第3列(索引2)作为Crucible ID
79
- if not sand_rows.empty:
80
- sand_row = sand_rows.iloc[0]
81
 
82
- # 填充元素数据
83
- for element, target_row in element_row_mapping.items():
84
- source_col = element_col_mapping[element]
85
- new_sheet[f'D{target_row}'] = sand_row[source_col]
86
-
87
- # 填充Analysis result/分析结果
88
- # 保持原有的测试项目名称,只更新分析结果列
89
- for i in range(20, 29):
90
- if i == 20:
91
- new_sheet[f'D{i}'] = row['OD1'] # 外径1
92
- elif i == 21:
93
- new_sheet[f'D{i}'] = row['OD2'] # 外径2
94
- elif i == 22:
95
- new_sheet[f'D{i}'] = row['OD3'] # 外径3
96
- elif i == 23:
97
- new_sheet[f'D{i}'] = row['Height'] # 高度
98
- elif i == 24:
99
- new_sheet[f'D{i}'] = row['Wall11'] # 壁厚11
100
- elif i == 25:
101
- new_sheet[f'D{i}'] = row['Wall12'] # 壁厚12
102
- elif i == 26:
103
- new_sheet[f'D{i}'] = row['Wall13'] # 壁厚13
104
- elif i == 27:
105
- new_sheet[f'D{i}'] = row['Wall2'] # 壁厚2
106
- elif i == 28:
107
- new_sheet[f'D{i}'] = row['Wall3'] # 壁厚3
108
-
109
- # 保持"批准人:"文本,并在其后添加名字
110
- new_sheet['D29'] = f"批准人:{quality_assured_by}"
111
-
112
- # 保存修改后的文件
113
- wb.save('2_updated.xlsx')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  from openpyxl import load_workbook
3
  from datetime import datetime, timedelta
4
+ import os # Added for path manipulation
5
 
6
+ def process_files(file1_path, file2_path, output_filename="generated_report.xlsx"):
7
+ """
8
+ Processes two input Excel files and generates a combined report.
9
+
10
+ Args:
11
+ file1_path (str): Path to the first input Excel file (data source).
12
+ file2_path (str): Path to the second input Excel file (template).
13
+ output_filename (str): Desired name for the output report file.
14
+
15
+ Returns:
16
+ str: The path to the generated output Excel file.
17
+ Returns None if an error occurs during processing.
18
+ """
19
+ try:
20
+ # 读取第一个文件
21
+ header_df = pd.read_excel(file1_path, sheet_name='HEADER')
22
+ dimension_df = pd.read_excel(file1_path, sheet_name='Dimension', skiprows=12)
23
+ dimension_df.columns = dimension_df.iloc[0]
24
+ dimension_df = dimension_df.iloc[1:].reset_index(drop=True)
25
+ sand_df = pd.read_excel(file1_path, sheet_name='Sand', header=None)
26
+
27
+ # 读取第二个文件
28
+ wb = load_workbook(file2_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ # Check if 'WACKER' sheet exists
31
+ if 'WACKER' not in wb.sheetnames:
32
+ print("Error: Template file must contain a sheet named 'WACKER'.")
33
+ return None # Indicate error
34
+ wacker_sheet = wb['WACKER']
35
+
36
+
37
+ # 获取Sales Order Quantity和Quality Assured By
38
+ sales_order_quantity = header_df.iloc[5, 2]
39
+ quality_assured_by = header_df.iloc[3, 7]
40
+
41
+ # 定义元素和行号的对应关系 (Copied from original script)
42
+ element_row_mapping = {
43
+ 'Al': 9, 'Ca': 10, 'Cu': 11, 'Fe': 12, 'K': 13, 'Li': 14,
44
+ 'Mg': 15, 'Mn': 16, 'Na': 17, 'Ti': 18, 'Zr': 19
45
+ }
46
+ element_col_mapping = {
47
+ 'Al': 4, 'Ca': 5, 'Cu': 6, 'Fe': 7, 'K': 8, 'Li': 9,
48
+ 'Mg': 10, 'Mn': 11, 'Na': 12, 'Ti': 13, 'Zr': 14
49
+ }
50
+
51
+
52
+ # 遍历Dimension表格中的每个Customer ID
53
+ for index, row in dimension_df.iterrows():
54
+ customer_id = row['Customer ID']
55
+ # Ensure customer_id is a valid sheet name (Excel has limitations)
56
+ safe_customer_id = str(customer_id).replace('/', '-').replace('\\', '-').replace('?', '').replace('*', '').replace('[', '').replace(']', '')
57
+ safe_customer_id = safe_customer_id[:31] # Max sheet name length
58
+
59
+ # Handle potential NaN or empty Customer ID
60
+ if pd.isna(customer_id) or not str(customer_id).strip():
61
+ print(f"Skipping row {index+14} due to missing or invalid Customer ID.") # +14 accounts for header rows skipped
62
+ continue
63
+
64
+ inspection_date_str = ""
65
+ inspection_date = None # Initialize inspection_date
66
+ try:
67
+ # Check if inspection_date is already datetime or needs conversion
68
+ if isinstance(row['Inspection Date'], datetime):
69
+ inspection_date = row['Inspection Date']
70
+ else:
71
+ inspection_date = pd.to_datetime(row['Inspection Date'])
72
+ inspection_date_str = inspection_date.strftime('%Y-%m-%d')
73
+ except Exception as e:
74
+ print(f"Warning: Could not parse Inspection Date for Customer ID {customer_id}: {e}. Skipping date fields.")
75
+ # inspection_date remains None
76
+
77
+
78
+ new_sheet_title = safe_customer_id
79
+ # Avoid duplicate sheet names if safe_customer_id becomes the same for different original IDs
80
+ sheet_count = 1
81
+ while new_sheet_title in wb.sheetnames:
82
+ suffix = f"_{sheet_count}"
83
+ max_len = 31 - len(suffix)
84
+ new_sheet_title = safe_customer_id[:max_len] + suffix
85
+ sheet_count += 1
86
+
87
+ new_sheet = wb.create_sheet(title=new_sheet_title)
88
+
89
+ # 复制WACKER表格的内容到新工作表
90
+ for row_wacker in wacker_sheet.iter_rows(values_only=True):
91
+ new_sheet.append(row_wacker)
92
+
93
+ # 填充数据
94
+ new_sheet['B3'] = str(sales_order_quantity) + ' PCS'
95
+ new_sheet['B4'] = customer_id # Use original ID here
96
+ if inspection_date: # Only fill dates if parsing was successful
97
+ new_sheet['D4'] = inspection_date_str
98
+ new_sheet['B5'] = inspection_date_str
99
+ new_sheet['D5'] = (inspection_date + timedelta(days=730)).strftime('%Y-%m-%d')
100
+
101
+
102
+ # 从sand表中获取当前customer_id的数据
103
+ sand_rows = sand_df[sand_df[2] == customer_id] # 使用第3列(索引2)作为Crucible ID
104
+ if not sand_rows.empty:
105
+ sand_row = sand_rows.iloc[0]
106
+ # 填充元素数据 (with added error handling)
107
+ for element, target_row in element_row_mapping.items():
108
+ try:
109
+ source_col = element_col_mapping[element]
110
+ # Check if value exists and handle potential errors
111
+ value = sand_row.get(source_col) # Use .get for safety
112
+ if value is not None and not pd.isna(value):
113
+ new_sheet[f'D{target_row}'] = value
114
+ else:
115
+ print(f"Warning: Missing or invalid sand data for {element}, Customer ID {customer_id}, Col Index {source_col}")
116
+ # Optionally fill with a default value or leave blank
117
+ # new_sheet[f'D{target_row}'] = "N/A"
118
+ except KeyError:
119
+ print(f"Warning: Column index {source_col} not found in sand_row for {element}, Customer ID {customer_id}")
120
+ except Exception as e:
121
+ print(f"Error filling element {element} for Customer ID {customer_id}: {e}")
122
+
123
+
124
+ # 填充Analysis result/分析结果 (with added error handling)
125
+ dim_mapping = {
126
+ 20: 'OD1', 21: 'OD2', 22: 'OD3', 23: 'Height',
127
+ 24: 'Wall11', 25: 'Wall12', 26: 'Wall13',
128
+ 27: 'Wall2', 28: 'Wall3'
129
+ }
130
+ for target_row, source_col_name in dim_mapping.items():
131
+ try:
132
+ # Check if value exists and handle potential errors
133
+ value = row.get(source_col_name) # Use .get for safety
134
+ if value is not None and not pd.isna(value):
135
+ new_sheet[f'D{target_row}'] = value
136
+ else:
137
+ print(f"Warning: Missing or invalid dimension data for {source_col_name}, Customer ID {customer_id}")
138
+ # Optionally fill with a default value or leave blank
139
+ # new_sheet[f'D{target_row}'] = "N/A"
140
+ except KeyError:
141
+ print(f"Warning: Column '{source_col_name}' not found in dimension_df for Customer ID {customer_id}")
142
+ except Exception as e:
143
+ print(f"Error filling dimension {source_col_name} for Customer ID {customer_id}: {e}")
144
+
145
+ # 保持"批准人:"文本,并在其后添加名字
146
+ new_sheet['D29'] = f"批准人:{quality_assured_by}"
147
+
148
+ # Remove the original template sheet if it exists and wasn't intended to be kept
149
+ if 'WACKER' in wb.sheetnames:
150
+ del wb['WACKER'] # Remove template if no longer needed
151
+
152
+ # 保存修改后的文件
153
+ wb.save(output_filename)
154
+ return output_filename # Return the path of the saved file
155
+
156
+ except FileNotFoundError:
157
+ print(f"Error: Input file not found. Check paths: {file1_path}, {file2_path}")
158
+ return None
159
+ except KeyError as e:
160
+ print(f"Error: Missing expected column or sheet name: {e}. Check input file formats.")
161
+ return None
162
+ except Exception as e:
163
+ # Log other unexpected errors
164
+ import traceback
165
+ print(f"An unexpected error occurred in process_files: {e}")
166
+ print(traceback.format_exc())
167
+ return None
168
+
169
+
170
+ # Keep the original script behavior if run directly (optional)
171
+ if __name__ == "__main__":
172
+ # Define default input/output files for direct execution
173
+ default_file1 = '1.xls'
174
+ default_file2 = '2.xlsx'
175
+ default_output = '2_updated.xlsx'
176
+
177
+ print(f"Running script directly. Processing {default_file1} and {default_file2}...")
178
+ output_path = process_files(default_file1, default_file2, default_output)
179
+
180
+ if output_path:
181
+ print(f"Report generated successfully: {output_path}")
182
+ else:
183
+ print("Report generation failed.")
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  pandas>=1.3.0
2
- openpyxl>=3.0.0
 
 
1
  pandas>=1.3.0
2
+ openpyxl>=3.0.0
3
+ gradio>=3.0