File size: 4,070 Bytes
ffa1f50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import pandas as pd
import os
from pathlib import Path

class ExcelToCsvConverter:
    """
    Convert an Excel file to CSV files.
    """
    
    def __init__(self, excel_path, output_dir=None):
        self.excel_path = excel_path
        self.output_dir = output_dir
    

    def convert_excel_to_csv(excel_path, output_dir=None):
        """
        Convert each sheet of an Excel file to a separate CSV file.
        
        Args:
            excel_path (str): Path to the Excel file
            output_dir (str): Output directory for CSV files. If None, uses same directory as Excel file
        """
        try:
            # Set up output directory
            if output_dir is None:
                output_dir = os.path.dirname(excel_path)
            
            # Create output directory if it doesn't exist
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            
            # Read Excel file
            excel_file = pd.ExcelFile(excel_path)
            converted_files = []
            
            for i, sheet_name in enumerate(excel_file.sheet_names, 1):
                # Read the sheet
                df = pd.read_excel(excel_path, sheet_name=sheet_name)
                
                # Create a safe filename for the CSV
                safe_filename = "".join(c for c in sheet_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
                #for specific sheet name, save the file name and use it later
                self.sheet_name = sheet_name
                self.safe_filename = safe_filename
                safe_filename = safe_filename.replace(' ', '_')
                csv_filename = f"{safe_filename}.csv"
                csv_path = os.path.join(output_dir, csv_filename)
                
                # Save as CSV
                df.to_csv(csv_path, index=False, encoding='utf-8')
                converted_files.append(csv_path)
                
                print(f"βœ… {i}. '{sheet_name}' β†’ {csv_filename}")
                print(f"   - Saved {len(df)} rows, {len(df.columns)} columns")
            
            print(f"\nπŸŽ‰ Successfully converted {len(converted_files)} sheets to CSV files!")
            return converted_files
            
        except Exception as e:
            print(f"❌ Error converting Excel to CSV: {e}")
            return None


    def convert_specific_sheet_to_csv(excel_path, sheet_name, output_dir=None):
        """
        Convert a specific sheet of an Excel file to a CSV file.
        """
        if output_dir is None:
            output_dir = os.path.dirname(excel_path)
            
        df = pd.read_excel(excel_path, sheet_name=sheet_name)
        safe_filename = "".join(c for c in sheet_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
        safe_filename = safe_filename.replace(' ', '_')
        csv_filename = f"{safe_filename}.csv"
        csv_path = os.path.join(output_dir, csv_filename)
        df.to_csv(csv_path, index=False, encoding='utf-8')
        print(f"βœ… {sheet_name} β†’ {csv_filename}")

        return csv_path

    def main():
        """Main function to analyze and convert Excel file"""
        
        # Define paths
        excel_path = "data/real_data_excel/AI Project document.xlsx"
        output_dir = "data/real_data_excel/converted_csv"
        
        # Check if Excel file exists
        if not os.path.exists(excel_path):
            print(f"❌ Excel file not found: {excel_path}")
            return
        
        print("=" * 60)
        print("πŸ“Š EXCEL TO CSV CONVERTER")
        print("=" * 60)
        
        # Step 1: Analyze Excel structure
        sheet_info = analyze_excel_structure(excel_path)
        
        if sheet_info is None:
            return
        
        # Step 2: Convert to CSV
        converted_files = convert_excel_to_csv(excel_path, output_dir)
        
        if converted_files:
            print("\nπŸ“‚ Converted files:")
            for file_path in converted_files:
                print(f"   - {file_path}")

if __name__ == "__main__":
    main()