| | import openpyxl
|
| | import pandas as pd
|
| | from pathlib import Path
|
| |
|
| | def extract_excel_data(file_path):
|
| | """
|
| | Extract all data from an Excel file (.xlsx)
|
| |
|
| | Args:
|
| | file_path (str): Path to the .xlsx file
|
| |
|
| | Returns:
|
| | dict: Dictionary containing data from all sheets
|
| | """
|
| | try:
|
| |
|
| | workbook = openpyxl.load_workbook(file_path)
|
| |
|
| |
|
| | all_data = {}
|
| |
|
| | print(f"Processing file: {file_path}")
|
| | print(f"Number of sheets: {len(workbook.sheetnames)}\n")
|
| |
|
| |
|
| | for sheet_name in workbook.sheetnames:
|
| | sheet = workbook[sheet_name]
|
| |
|
| | print(f"--- Sheet: {sheet_name} ---")
|
| | print(f"Dimensions: {sheet.dimensions}")
|
| |
|
| |
|
| | sheet_data = []
|
| | for row in sheet.iter_rows(values_only=True):
|
| | sheet_data.append(row)
|
| |
|
| | all_data[sheet_name] = sheet_data
|
| |
|
| |
|
| | print(f"First 5 rows:")
|
| | for i, row in enumerate(sheet_data[:5]):
|
| | print(f" Row {i+1}: {row}")
|
| | print()
|
| |
|
| | return all_data
|
| |
|
| | except FileNotFoundError:
|
| | print(f"Error: File '{file_path}' not found.")
|
| | return None
|
| | except Exception as e:
|
| | print(f"Error: {e}")
|
| | return None
|
| |
|
| |
|
| | def extract_excel_with_pandas(file_path):
|
| | """
|
| | Alternative method using pandas to extract Excel data
|
| |
|
| | Args:
|
| | file_path (str): Path to the .xlsx file
|
| |
|
| | Returns:
|
| | dict: Dictionary of DataFrames, one per sheet
|
| | """
|
| | try:
|
| |
|
| | all_sheets = pd.read_excel(file_path, sheet_name=None)
|
| |
|
| | print(f"Processing file: {file_path}")
|
| | print(f"Number of sheets: {len(all_sheets)}\n")
|
| |
|
| |
|
| | for sheet_name, df in all_sheets.items():
|
| | print(f"--- Sheet: {sheet_name} ---")
|
| | print(f"Shape: {df.shape} (rows, columns)")
|
| | print(f"Columns: {list(df.columns)}")
|
| | print("\nFirst 5 rows:")
|
| | print(df.head())
|
| | print("\n")
|
| |
|
| | return all_sheets
|
| |
|
| | except FileNotFoundError:
|
| | print(f"Error: File '{file_path}' not found.")
|
| | return None
|
| | except Exception as e:
|
| | print(f"Error: {e}")
|
| | return None
|
| |
|
| |
|
| | if __name__ == "__main__":
|
| |
|
| | file_path = "ReportTester-263254895.xlsx"
|
| |
|
| | print("=" * 60)
|
| | print("Method 1: Using openpyxl")
|
| | print("=" * 60)
|
| | data = extract_excel_data(file_path)
|
| |
|
| | print("\n" + "=" * 60)
|
| | print("Method 2: Using pandas (recommended)")
|
| | print("=" * 60)
|
| | df_data = extract_excel_with_pandas(file_path)
|
| |
|
| |
|
| | if df_data:
|
| |
|
| | for sheet_name, df in df_data.items():
|
| | output_file = f"{sheet_name}.csv"
|
| | df.to_csv(output_file, index=False)
|
| | print(f"Exported '{sheet_name}' to {output_file}") |