smart_table_parsing / get_excel_structure.py
Scoooooott's picture
Fix data access logic in profile functions
89b3827
import os
import requests
import tempfile
from openpyxl import load_workbook
import logging
logger = logging.getLogger(__name__)
def main(file_info):
"""
获取 Excel 文件中所有的 Sheet 名称清单
arg1 (file_info): 包含 fileUrl 的字典
"""
src_tmp_name = None
try:
file_url = file_info.get("fileUrl")
file_key = file_info.get("fileKey", "file")
logger.info(f"正在获取文件 Sheet 列表: {file_key}")
# 1. 下载文件
resp = requests.get(file_url, timeout=30)
resp.raise_for_status()
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f_in:
f_in.write(resp.content)
src_tmp_name = f_in.name
# 2. 使用 read_only 模式快速读取 Sheet 名称
# 这是最轻量的方式,不会加载单元格数据
wb = load_workbook(src_tmp_name, read_only=True, keep_links=False)
sheet_names = wb.sheetnames
wb.close()
logger.info(f"成功识别到 {len(sheet_names)} 个 Sheet: {sheet_names}")
return {
"file_key": file_key,
"sheets": sheet_names,
"count": len(sheet_names),
"status": "success"
}
except Exception as e:
logger.error(f"获取 Sheet 列表异常: {str(e)}")
return {
"status": "failed",
"error_type": type(e).__name__,
"message": str(e)
}
finally:
# 清理临时文件
if src_tmp_name and os.path.exists(src_tmp_name):
try:
os.unlink(src_tmp_name)
except:
pass