File size: 4,070 Bytes
cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 8c95080 cd87ae5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import pandas as pd
import os
from pathlib import Path
class ExcelToCsvConverter:
"""
Convert an Excel file to CSV files.
"""
def __init__(self, excel_path, output_dir=None):
self.excel_path = excel_path
self.output_dir = output_dir
def convert_excel_to_csv(excel_path, output_dir=None):
"""
Convert each sheet of an Excel file to a separate CSV file.
Args:
excel_path (str): Path to the Excel file
output_dir (str): Output directory for CSV files. If None, uses same directory as Excel file
"""
try:
# Set up output directory
if output_dir is None:
output_dir = os.path.dirname(excel_path)
# Create output directory if it doesn't exist
Path(output_dir).mkdir(parents=True, exist_ok=True)
# Read Excel file
excel_file = pd.ExcelFile(excel_path)
converted_files = []
for i, sheet_name in enumerate(excel_file.sheet_names, 1):
# Read the sheet
df = pd.read_excel(excel_path, sheet_name=sheet_name)
# Create a safe filename for the CSV
safe_filename = "".join(c for c in sheet_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
#for specific sheet name, save the file name and use it later
self.sheet_name = sheet_name
self.safe_filename = safe_filename
safe_filename = safe_filename.replace(' ', '_')
csv_filename = f"{safe_filename}.csv"
csv_path = os.path.join(output_dir, csv_filename)
# Save as CSV
df.to_csv(csv_path, index=False, encoding='utf-8')
converted_files.append(csv_path)
print(f"β
{i}. '{sheet_name}' β {csv_filename}")
print(f" - Saved {len(df)} rows, {len(df.columns)} columns")
print(f"\nπ Successfully converted {len(converted_files)} sheets to CSV files!")
return converted_files
except Exception as e:
print(f"β Error converting Excel to CSV: {e}")
return None
def convert_specific_sheet_to_csv(excel_path, sheet_name, output_dir=None):
"""
Convert a specific sheet of an Excel file to a CSV file.
"""
if output_dir is None:
output_dir = os.path.dirname(excel_path)
df = pd.read_excel(excel_path, sheet_name=sheet_name)
safe_filename = "".join(c for c in sheet_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
safe_filename = safe_filename.replace(' ', '_')
csv_filename = f"{safe_filename}.csv"
csv_path = os.path.join(output_dir, csv_filename)
df.to_csv(csv_path, index=False, encoding='utf-8')
print(f"β
{sheet_name} β {csv_filename}")
return csv_path
def main():
"""Main function to analyze and convert Excel file"""
# Define paths
excel_path = "data/real_data_excel/AI Project document.xlsx"
output_dir = "data/real_data_excel/converted_csv"
# Check if Excel file exists
if not os.path.exists(excel_path):
print(f"β Excel file not found: {excel_path}")
return
print("=" * 60)
print("π EXCEL TO CSV CONVERTER")
print("=" * 60)
# Step 1: Analyze Excel structure
sheet_info = analyze_excel_structure(excel_path)
if sheet_info is None:
return
# Step 2: Convert to CSV
converted_files = convert_excel_to_csv(excel_path, output_dir)
if converted_files:
print("\nπ Converted files:")
for file_path in converted_files:
print(f" - {file_path}")
if __name__ == "__main__":
main() |