velmurugan1122's picture
Upload 24 files
1c6a866 verified
import sqlite3
import pandas as pd
# Define file paths
DB_PATH = r"E:\Healthcare_chat_AI\data\veludb.db"
file_path = r"E:\Healthcare_chat_AI\data\healthcare_dataset.csv"
# Load CSV into a Pandas DataFrame
df = pd.read_csv(file_path)
# Print original column names to debug potential issues
print("Original CSV Columns:", df.columns.tolist())
# Define column name mapping
column_mapping = {
"Blood Type": "Blood_Type",
"Medical Condition": "Medical_Condition",
"Date of Admission": "Date_of_Admission",
"Insurance Provider": "Insurance_Provider",
"Billing Amount": "Billing_Amount",
"Room Number": "Room_Number",
"Admission Type": "Admission_Type",
"Discharge Date": "Discharge_Date",
}
# Rename columns only if they exist in the CSV
df.rename(columns=column_mapping, inplace=True)
# Print updated column names to confirm the change
print("Updated Columns:", df.columns.tolist())
# Ensure all necessary columns exist
for col in column_mapping.values():
if col not in df.columns:
print(f"Error: Column '{col}' is missing after renaming! Check CSV headers.")
# Clean the data
df["Age"] = pd.to_numeric(df["Age"], errors="coerce").fillna(0).astype("Int64")
df["Billing_Amount"] = pd.to_numeric(df["Billing_Amount"], errors="coerce").fillna(0.0)
# Convert date columns if they exist
for col in ["Date_of_Admission", "Discharge_Date"]:
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors="coerce").dt.strftime("%Y-%m-%d")
# Connect to SQLite database
conn = sqlite3.connect("veludb.db")
cursor = conn.cursor()
# Create table with correct column names
cursor.execute("""
CREATE TABLE IF NOT EXISTS healthcare_data (
ID INTEGER PRIMARY KEY AUTOINCREMENT,
Name TEXT,
Age INTEGER,
Gender TEXT,
Blood_Type TEXT,
Medical_Condition TEXT,
Date_of_Admission TEXT,
Doctor TEXT,
Hospital TEXT,
Insurance_Provider TEXT,
Billing_Amount REAL,
Admission_Type TEXT,
Discharge_Date TEXT,
Medication TEXT,
Test_Results TEXT
)
""")
# Insert data into the database
df.to_sql("healthcare_data", conn, if_exists="replace", index=False)
# Commit and close connection
conn.commit()
conn.close()
print("Healthcare CSV cleaned and imported successfully!")