File size: 2,307 Bytes
059884f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import sqlite3
import os

# 1. Define the CSV files you need to load
csv_files = [
    "territory_dim.csv",
    "rep_dim.csv",
    "fact_rx.csv",
    "fact_payor_mix.csv",
    "date_dim.csv",
    "fact_ln_metrics.csv",
    "fact_rep_activity.csv",
    "account_dim.csv",
    "hcp_dim.csv"
]

# 2. Define the database file name
db_file = "pharma_data.db"

def load_csv_to_sqlite():
    """
    Loads a list of CSV files into a single SQLite database, 
    creating a table for each file.
    """

    csv_files = [
    "territory_dim.csv",
    "rep_dim.csv",
    "fact_rx.csv",
    "fact_payor_mix.csv",
    "date_dim.csv",
    "fact_ln_metrics.csv",
    "fact_rep_activity.csv",
    "account_dim.csv",
    "hcp_dim.csv"]

    # 2. Define the database file name
    db_file = "pharma_data.db"

    print(f"Connecting to database: {db_file}")
    
    # Connect to the SQLite database (it will be created if it doesn't exist)
    conn = sqlite3.connect(db_file)
    
    for file_name in csv_files:
        # Check if the file exists before attempting to load
        full_path = os.path.join("data", file_name)
        if not os.path.exists(full_path):
            print(f"⚠️ Warning: File not found: {file_name}. Skipping.")
            continue
            
        # Create a clean table name from the file name
        table_name = file_name.replace(".csv", "")
        
        try:
            print(f"Loading '{file_name}' into table '{table_name}'...")
            
            # Read the CSV into a pandas DataFrame
            # Assuming the first row contains headers.
            df = pd.read_csv(full_path)
            
            # Write the DataFrame to the SQL database
            # 'replace' will overwrite the table if it already exists
            df.to_sql(table_name, conn, if_exists='replace', index=False)
            
            print(f"   ✅ Successfully loaded {len(df)} rows.")

        except pd.errors.EmptyDataError:
            print(f"   ❌ Error: {file_name} is empty. Skipping.")
        except Exception as e:
            print(f"   ❌ An error occurred while processing {file_name}: {e}")

    # Close the connection
    conn.close()
    print("\nAll files processed. Database connection closed.")
    print(f"Database '{db_file}' is ready!")