Spaces:

ChatCausalGPT
/

test

Configuration error

App Files Files Community

ChatCausalGPT commited on Jan 26, 2025

Commit

76b7572

0 Parent(s):

Initial commit: Quality Inspection Report Generator

Browse files

Files changed (9) hide show

.gitignore +38 -0
LICENSE +21 -0
README.md +80 -0
check_template.py +34 -0
generate_quality_inspection_reports.py +113 -0
quality_inspection_documentation.py +109 -0
quick.py +61 -0
requirements.txt +2 -0
test_structure.py +38 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,38 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Excel files (since they contain data)
+*.xls
+*.xlsx
+*.xlsm
+*.xlsb
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# OS
+.DS_Store
+Thumbs.db

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Quality Inspection Report Generator
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,80 @@

+# Quality Inspection Report Generator
+一个用于从 Excel 数据生成质量检验报告的 Python 工具。
+## 功能特点
+- 从源数据 Excel 文件自动生成质量检验报告
+- 支持多个客户的批量处理
+- 保持模板格式和样式
+- 自动计算日期和有效期
+- 完整的化学元素和尺寸数据处理
+## 系统要求
+- Python 3.6+
+- pandas
+- openpyxl
+## 安装
+1. 克隆仓库：
+```bash
+git clone https://github.com/1587causalai/quality-inspection-report-generator.git
+cd quality-inspection-report-generator
+```
+2. 安装依赖：
+```bash
+pip install -r requirements.txt
+```
+## 使用方法
+1. 准备输入文件：
+   - `1.xls`：源数据文件，包含 HEADER、Dimension 和 Sand 工作表
+   - `2.xlsx`：模板文件，包含 WACKER 工作表
+2. 运行脚本：
+```bash
+python generate_quality_inspection_reports.py
+```
+3. 检查输出：
+   - 生成的报告将保存为 `2_updated.xlsx`
+   - 每个客户的数据将保存在单独的工作表中
+## 数据格式要求
+### 输入文件结构
+1. `1.xls` 包含：
+   - HEADER：基本信息（数量、批准人等）
+   - Dimension：尺寸数据和检验日期
+   - Sand：化学元素测试数据
+2. `2.xlsx` 包含：
+   - WACKER：报告模板格式
+### 输出报告格式
+- 基本信息（B3-D5）：数量、批号、日期等
+- 化学元素数据（D9-D19）：11种元素的测试结果
+- 尺寸数据（D20-D28）：外径、高度、壁厚等
+- 批准信息（D29）：批准人姓名
+## 文档
+详细的代码文档请参考 `quality_inspection_documentation.py`。
+## 许可证
+MIT License
+## 作者
+[Your Name]
+## 贡献
+欢迎提交 Issue 和 Pull Request！

check_template.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from openpyxl import load_workbook
+import pandas as pd
+def print_sheet_content(file_path, sheet_name):
+    wb = load_workbook(file_path)
+    sheet = wb[sheet_name]
+    print(f"\nContent of sheet '{sheet_name}':")
+    for i, row in enumerate(sheet.rows, 1):
+        values = [str(cell.value) if cell.value is not None else '' for cell in row]
+        print(f"Row {i}: {values}")
+# 打印模板文件的内容
+print_sheet_content('2.xlsx', 'WACKER')
+# 读取第一个文件的Sand工作表
+print("\n原始数据结构：")
+file1 = '1.xls'
+df = pd.read_excel(file1, sheet_name='Sand')
+print("\n列名：")
+print(df.columns.tolist())
+# 打印前几行数据以查看结构
+print("\n前几行数据：")
+print(df.head())
+# 读取跳过12行的数据
+print("\n跳过12行的数据：")
+sand_df_skipped = pd.read_excel(file1, sheet_name='Sand', skiprows=12)
+print(sand_df_skipped.head())
+# 打印所有列名
+print("\n跳过12行后的列名：")
+print(sand_df_skipped.columns.tolist())

generate_quality_inspection_reports.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import pandas as pd
+from openpyxl import load_workbook
+from datetime import datetime, timedelta
+# 读取第一个文件
+file1 = '1.xls'
+header_df = pd.read_excel(file1, sheet_name='HEADER')
+# 读取Dimension表，跳过前12行，然后使用第13行作为列名
+dimension_df = pd.read_excel(file1, sheet_name='Dimension', skiprows=12)
+# 使用第一行作为列名
+dimension_df.columns = dimension_df.iloc[0]
+# 删除第一行（现在已经作为列名）并重置索引
+dimension_df = dimension_df.iloc[1:].reset_index(drop=True)
+# 读取Sand表的数据
+sand_df = pd.read_excel(file1, sheet_name='Sand', header=None)
+# 读取第二个文件
+file2 = '2.xlsx'
+wb = load_workbook(file2)
+wacker_sheet = wb['WACKER']
+# 获取Sales Order Quantity和Quality Assured By
+sales_order_quantity = header_df.iloc[5, 2]  # Sales Order Quantity位置
+quality_assured_by = header_df.iloc[3, 7]    # Quality Assured By在第4行最后一列
+# 定义元素和行号的对应关系
+element_row_mapping = {
+    'Al': 9,   # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Al
+    'Ca': 10,  # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Ca
+    'Cu': 11,  # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Cu
+    'Fe': 12,  # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Fe
+    'K': 13,   # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_K
+    'Li': 14,  # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Li
+    'Mg': 15,  # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Mg
+    'Mn': 16,  # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Mn
+    'Na': 17,  # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Na
+    'Ti': 18,  # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Ti
+    'Zr': 19   # 硅柏_石英坩埚_QC530HS_201410_V3B-CN_Zr
+}
+# 定义元素在Sand表中的列索引
+element_col_mapping = {
+    'Al': 4,   # 第5列
+    'Ca': 5,   # 第6列
+    'Cu': 6,   # 第7列
+    'Fe': 7,   # 第8列
+    'K': 8,    # 第9列
+    'Li': 9,   # 第10列
+    'Mg': 10,  # 第11列
+    'Mn': 11,  # 第12列
+    'Na': 12,  # 第13列
+    'Ti': 13,  # 第14列
+    'Zr': 14   # 第15列
+}
+# 遍历Dimension表格中的每个Customer ID
+for index, row in dimension_df.iterrows():
+    customer_id = row['Customer ID']  # 现在这个列名应该是正确的了
+    inspection_date = pd.to_datetime(row['Inspection Date']).strftime('%Y-%m-%d')  # 格式化日期
+    # 创建新的工作表
+    new_sheet = wb.create_sheet(title=str(customer_id))
+    # 复制WACKER表格的内容到新工作表（这样会保持原有的客户名称）
+    for row_wacker in wacker_sheet.iter_rows(values_only=True):
+        new_sheet.append(row_wacker)
+    # 填充数据（不再覆盖客户名称）
+    new_sheet['B3'] = str(sales_order_quantity) + ' PCS'  # Number+Unit/数量+单位
+    new_sheet['B4'] = customer_id  # Batch reference/批号
+    new_sheet['D4'] = inspection_date  # Date of issue/报告日期
+    new_sheet['B5'] = inspection_date  # Production date/生产日期
+    new_sheet['D5'] = (datetime.strptime(inspection_date, '%Y-%m-%d') + timedelta(days=730)).strftime('%Y-%m-%d')  # Expiring date/失效日期
+    # 从sand表中获取当前customer_id的数据
+    sand_rows = sand_df[sand_df[2] == customer_id]  # 使用第3列（索引2）作为Crucible ID
+    if not sand_rows.empty:
+        sand_row = sand_rows.iloc[0]
+        # 填充元素数据
+        for element, target_row in element_row_mapping.items():
+            source_col = element_col_mapping[element]
+            new_sheet[f'D{target_row}'] = sand_row[source_col]
+    # 填充Analysis result/分析结果
+    # 保持原有的测试项目名称，只更新分析结果列
+    for i in range(20, 29):
+        if i == 20:
+            new_sheet[f'D{i}'] = row['OD1']  # 外径1
+        elif i == 21:
+            new_sheet[f'D{i}'] = row['OD2']  # 外径2
+        elif i == 22:
+            new_sheet[f'D{i}'] = row['OD3']  # 外径3
+        elif i == 23:
+            new_sheet[f'D{i}'] = row['Height']  # 高度
+        elif i == 24:
+            new_sheet[f'D{i}'] = row['Wall11']  # 壁厚11
+        elif i == 25:
+            new_sheet[f'D{i}'] = row['Wall12']  # 壁厚12
+        elif i == 26:
+            new_sheet[f'D{i}'] = row['Wall13']  # 壁厚13
+        elif i == 27:
+            new_sheet[f'D{i}'] = row['Wall2']  # 壁厚2
+        elif i == 28:
+            new_sheet[f'D{i}'] = row['Wall3']  # 壁厚3
+    # 保持"批准人："文本，并在其后添加名字
+    new_sheet['D29'] = f"批准人：{quality_assured_by}"
+# 保存修改后的文件
+wb.save('2_updated.xlsx')

quality_inspection_documentation.py ADDED Viewed

	@@ -0,0 +1,109 @@

+"""
+质量检验报告生成脚本说明文档
+============================
+本文档详细说明了 generate_quality_inspection_reports.py 的工作原理和数据处理流程。
+输入文件
+--------
+1. 1.xls - 源数据文件，包含以下工作表：
+   - HEADER: 包含基本信息如数量和批准人
+   - Dimension: 包含尺寸数据和检验日期
+   - Sand: 包含化学元素测试数据
+2. 2.xlsx - 模板文件，包含：
+   - WACKER: 模板工作表，包含预设格式
+数据结构
+--------
+1. Sand表结构（跳过前12行后）：
+   - 第3列 (索引2): Crucible ID（客户ID）
+   - 第5-15列 (索引4-14): 化学元素测试数据
+     * Al: 第5列 (索引4)
+     * Ca: 第6列 (索引5)
+     * Cu: 第7列 (索引6)
+     * Fe: 第8列 (索引7)
+     * K:  第9列 (索引8)
+     * Li: 第10列 (索引9)
+     * Mg: 第11列 (索引10)
+     * Mn: 第12列 (索引11)
+     * Na: 第13列 (索引12)
+     * Ti: 第14列 (索引13)
+     * Zr: 第15列 (索引14)
+2. 输出报告结构：
+   - 基本信息（B3-D5）：数量、批号、日期等
+   - 化学元素数据（D9-D19）：对应11种元素的测试结果
+   - 尺寸数据（D20-D28）：外径、高度、壁厚等
+   - 批准信息（D29）：批准人姓名
+处理流程
+--------
+1. 数据读取阶段：
+   - 读取HEADER表获取基本信息
+   - 读取Dimension表获取尺寸数据和日期信息
+   - 读取Sand表获取化学元素测试数据
+   - 读取模板文件作为基础格式
+2. 数据处理阶段：
+   - 遍历Dimension表中的每个Customer ID
+   - 为每个Customer ID创建新的工作表
+   - 复制模板内容保持格式一致
+   - 填充基本信息（数量、日期等）
+   - 从Sand表匹配并填充化学元素数据
+   - 填充尺寸数据
+   - 添加批准人信息
+3. 数据映射关系：
+   a. 元素行号映射 (element_row_mapping):
+      - Al -> 第9行
+      - Ca -> 第10行
+      - Cu -> 第11行
+      ...（依此类推）
+   b. 元素列索引映射 (element_col_mapping):
+      - Al -> 第5列（索引4）
+      - Ca -> 第6列（索引5）
+      - Cu -> 第7列（索引6）
+      ...（依此类推）
+注意事项
+--------
+1. 日期处理：
+   - 检验日期格式化为 YYYY-MM-DD
+   - 失效日期自动计算为检验日期+730天（2年）
+2. 数据验证：
+   - 检查Customer ID是否存在于Sand表中
+   - 确保所有必要的数据都被正确填充
+3. 格式保持：
+   - 通过复制模板内容保持原有格式
+   - 保持"批准人："文本的一致性
+使用方法
+--------
+1. 确保输入文件 (1.xls 和 2.xlsx) 在正确的位置
+2. 运行脚本：python generate_quality_inspection_reports.py
+3. 输出文件将保存为 2_updated.xlsx
+输出文件
+--------
+2_updated.xlsx：包含所有客户的质量检验报告，每个客户一个工作表，
+保持了原有的格式并填充了所有必要的数据。
+"""
+# 示例：如何使用生成的报告
+def usage_example():
+    print("使用示例：")
+    print("1. 准备输入文件：")
+    print("   - 确保有1.xls（源数据）")
+    print("   - 确保有2.xlsx（模板）")
+    print("\n2. 运行脚本：")
+    print("   python generate_quality_inspection_reports.py")
+    print("\n3. 检查输出：")
+    print("   - 查看2_updated.xlsx")
+    print("   - 验证每个客户的工作表是否正确生成")
+if __name__ == "__main__":
+    usage_example()

quick.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import pandas as pd
+def extract_sand_data(input_file, output_file):
+    try:
+        # 读取Excel文件的Sand工作表
+        xls = pd.ExcelFile(input_file)
+        if 'Sand' not in xls.sheet_names:
+            raise ValueError("Excel文件中缺少'Sand'工作表")
+        # 读取Sand工作表
+        df = pd.read_excel(xls, sheet_name='Sand', header=None)
+        # 定义数据起始行和列索引
+        start_row = 13  # 数据从第11行开始（0-based索引）
+        crucible_id_col = 2  # Crucible ID在C列（0-based索引）
+        element_start_col = 4  # 元素数据从第5列开始（0-based索引）
+        element_end_col = 14  # 元素数据到第15列结束（0-based索引）
+        # 提取数据
+        results = []
+        for row_idx in range(start_row, len(df)):
+            row = df.iloc[row_idx]
+            if pd.isna(row[crucible_id_col]):  # 跳过空行
+                continue
+            # 提取Crucible ID
+            crucible_id = str(row[crucible_id_col]).strip()
+            # 提取元素数据
+            elements = {
+                "Crucible ID": crucible_id,
+                "Al": row[element_start_col],
+                "Ca": row[element_start_col + 1],
+                "Cu": row[element_start_col + 2],
+                "Fe": row[element_start_col + 3],
+                "K": row[element_start_col + 4],
+                "Li": row[element_start_col + 5],
+                "Mg": row[element_start_col + 6],
+                "Mn": row[element_start_col + 7],
+                "Na": row[element_start_col + 8],
+                "Ti": row[element_start_col + 9],
+                "Zr": row[element_start_col + 10],
+            }
+            results.append(elements)
+        # 将结果转换为DataFrame
+        result_df = pd.DataFrame(results)
+        # 保存到新的Excel文件
+        result_df.to_excel(output_file, index=False)
+        return f"数据已成功保存到 {output_file}"
+    except Exception as e:
+        return f"处理错误: {str(e)}"
+# 使用示例
+if __name__ == "__main__":
+    input_file = "1.xls"  # 输入文件路径
+    output_file = "output.xlsx"  # 输出文件路径
+    result = extract_sand_data(input_file, output_file)
+    print(result)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ pandas>=1.3.0
2	+ openpyxl>=3.0.0

test_structure.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import pandas as pd
+def analyze_excel_structure(file_path):
+    """分析Excel文件的基本结构"""
+    print(f"\n开始分析文件: {file_path}")
+    # 读取Excel文件
+    xls = pd.ExcelFile(file_path)
+    # 打印所有sheet名称
+    print("\n所有Sheet页:")
+    for sheet_name in xls.sheet_names:
+        print(f"- {sheet_name}")
+    # 分析每个sheet的结构
+    for sheet_name in xls.sheet_names:
+        print(f"\n\n分析 Sheet [{sheet_name}]:")
+        # 尝试不同的skiprows值来找到正确的数据结构
+        for skip_rows in [0, 6, 12]:
+            print(f"\n跳过 {skip_rows} 行后的结构:")
+            try:
+                df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=skip_rows)
+                print("\n列名:")
+                print(df.columns.tolist())
+                print("\n前3行数据:")
+                print(df.head(3))
+                # 打印非空行数
+                non_empty_rows = len(df.dropna(how='all'))
+                print(f"\n非空行数: {non_empty_rows}")
+            except Exception as e:
+                print(f"读取出错: {str(e)}")
+if __name__ == "__main__":
+    INPUT_FILE = "1.xls"
+    analyze_excel_structure(INPUT_FILE)