Spaces:
Configuration error
Configuration error
Commit
·
5639ab4
1
Parent(s):
76b7572
chore: remove unnecessary files
Browse files- check_template.py +0 -34
- quick.py +0 -61
- test_structure.py +0 -38
check_template.py
DELETED
|
@@ -1,34 +0,0 @@
|
|
| 1 |
-
from openpyxl import load_workbook
|
| 2 |
-
import pandas as pd
|
| 3 |
-
|
| 4 |
-
def print_sheet_content(file_path, sheet_name):
|
| 5 |
-
wb = load_workbook(file_path)
|
| 6 |
-
sheet = wb[sheet_name]
|
| 7 |
-
|
| 8 |
-
print(f"\nContent of sheet '{sheet_name}':")
|
| 9 |
-
for i, row in enumerate(sheet.rows, 1):
|
| 10 |
-
values = [str(cell.value) if cell.value is not None else '' for cell in row]
|
| 11 |
-
print(f"Row {i}: {values}")
|
| 12 |
-
|
| 13 |
-
# 打印模板文件的内容
|
| 14 |
-
print_sheet_content('2.xlsx', 'WACKER')
|
| 15 |
-
|
| 16 |
-
# 读取第一个文件的Sand工作表
|
| 17 |
-
print("\n原始数据结构:")
|
| 18 |
-
file1 = '1.xls'
|
| 19 |
-
df = pd.read_excel(file1, sheet_name='Sand')
|
| 20 |
-
print("\n列名:")
|
| 21 |
-
print(df.columns.tolist())
|
| 22 |
-
|
| 23 |
-
# 打印前几行数据以查看结构
|
| 24 |
-
print("\n前几行数据:")
|
| 25 |
-
print(df.head())
|
| 26 |
-
|
| 27 |
-
# 读取跳过12行的数据
|
| 28 |
-
print("\n跳过12行的数据:")
|
| 29 |
-
sand_df_skipped = pd.read_excel(file1, sheet_name='Sand', skiprows=12)
|
| 30 |
-
print(sand_df_skipped.head())
|
| 31 |
-
|
| 32 |
-
# 打印所有列名
|
| 33 |
-
print("\n跳过12行后的列名:")
|
| 34 |
-
print(sand_df_skipped.columns.tolist())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
quick.py
DELETED
|
@@ -1,61 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
|
| 3 |
-
def extract_sand_data(input_file, output_file):
|
| 4 |
-
try:
|
| 5 |
-
# 读取Excel文件的Sand工作表
|
| 6 |
-
xls = pd.ExcelFile(input_file)
|
| 7 |
-
if 'Sand' not in xls.sheet_names:
|
| 8 |
-
raise ValueError("Excel文件中缺少'Sand'工作表")
|
| 9 |
-
|
| 10 |
-
# 读取Sand工作表
|
| 11 |
-
df = pd.read_excel(xls, sheet_name='Sand', header=None)
|
| 12 |
-
|
| 13 |
-
# 定义数据起始行和列索引
|
| 14 |
-
start_row = 13 # 数据从第11行开始(0-based索引)
|
| 15 |
-
crucible_id_col = 2 # Crucible ID在C列(0-based索引)
|
| 16 |
-
element_start_col = 4 # 元素数据从第5列开始(0-based索引)
|
| 17 |
-
element_end_col = 14 # 元素数据到第15列结束(0-based索引)
|
| 18 |
-
|
| 19 |
-
# 提取数据
|
| 20 |
-
results = []
|
| 21 |
-
for row_idx in range(start_row, len(df)):
|
| 22 |
-
row = df.iloc[row_idx]
|
| 23 |
-
if pd.isna(row[crucible_id_col]): # 跳过空行
|
| 24 |
-
continue
|
| 25 |
-
|
| 26 |
-
# 提取Crucible ID
|
| 27 |
-
crucible_id = str(row[crucible_id_col]).strip()
|
| 28 |
-
|
| 29 |
-
# 提取元素数据
|
| 30 |
-
elements = {
|
| 31 |
-
"Crucible ID": crucible_id,
|
| 32 |
-
"Al": row[element_start_col],
|
| 33 |
-
"Ca": row[element_start_col + 1],
|
| 34 |
-
"Cu": row[element_start_col + 2],
|
| 35 |
-
"Fe": row[element_start_col + 3],
|
| 36 |
-
"K": row[element_start_col + 4],
|
| 37 |
-
"Li": row[element_start_col + 5],
|
| 38 |
-
"Mg": row[element_start_col + 6],
|
| 39 |
-
"Mn": row[element_start_col + 7],
|
| 40 |
-
"Na": row[element_start_col + 8],
|
| 41 |
-
"Ti": row[element_start_col + 9],
|
| 42 |
-
"Zr": row[element_start_col + 10],
|
| 43 |
-
}
|
| 44 |
-
results.append(elements)
|
| 45 |
-
|
| 46 |
-
# 将结果转换为DataFrame
|
| 47 |
-
result_df = pd.DataFrame(results)
|
| 48 |
-
|
| 49 |
-
# 保存到新的Excel文件
|
| 50 |
-
result_df.to_excel(output_file, index=False)
|
| 51 |
-
return f"数据已成功保存到 {output_file}"
|
| 52 |
-
|
| 53 |
-
except Exception as e:
|
| 54 |
-
return f"处理错误: {str(e)}"
|
| 55 |
-
|
| 56 |
-
# 使用示例
|
| 57 |
-
if __name__ == "__main__":
|
| 58 |
-
input_file = "1.xls" # 输入文件路径
|
| 59 |
-
output_file = "output.xlsx" # 输出文件路径
|
| 60 |
-
result = extract_sand_data(input_file, output_file)
|
| 61 |
-
print(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_structure.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
|
| 3 |
-
def analyze_excel_structure(file_path):
|
| 4 |
-
"""分析Excel文件的基本结构"""
|
| 5 |
-
print(f"\n开始分析文件: {file_path}")
|
| 6 |
-
|
| 7 |
-
# 读取Excel文件
|
| 8 |
-
xls = pd.ExcelFile(file_path)
|
| 9 |
-
|
| 10 |
-
# 打印所有sheet名称
|
| 11 |
-
print("\n所有Sheet页:")
|
| 12 |
-
for sheet_name in xls.sheet_names:
|
| 13 |
-
print(f"- {sheet_name}")
|
| 14 |
-
|
| 15 |
-
# 分析每个sheet的结构
|
| 16 |
-
for sheet_name in xls.sheet_names:
|
| 17 |
-
print(f"\n\n分析 Sheet [{sheet_name}]:")
|
| 18 |
-
|
| 19 |
-
# 尝试不同的skiprows值来找到正确的数据结构
|
| 20 |
-
for skip_rows in [0, 6, 12]:
|
| 21 |
-
print(f"\n跳过 {skip_rows} 行后的结构:")
|
| 22 |
-
try:
|
| 23 |
-
df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=skip_rows)
|
| 24 |
-
print("\n列名:")
|
| 25 |
-
print(df.columns.tolist())
|
| 26 |
-
print("\n前3行数据:")
|
| 27 |
-
print(df.head(3))
|
| 28 |
-
|
| 29 |
-
# 打印非空行数
|
| 30 |
-
non_empty_rows = len(df.dropna(how='all'))
|
| 31 |
-
print(f"\n非空行数: {non_empty_rows}")
|
| 32 |
-
|
| 33 |
-
except Exception as e:
|
| 34 |
-
print(f"读取出错: {str(e)}")
|
| 35 |
-
|
| 36 |
-
if __name__ == "__main__":
|
| 37 |
-
INPUT_FILE = "1.xls"
|
| 38 |
-
analyze_excel_structure(INPUT_FILE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|