Spaces:
Sleeping
Sleeping
| import asyncio | |
| from datetime import time, timedelta, datetime | |
| import pandas as pd | |
| import numpy as np | |
| from sqlalchemy import select | |
| from src.configs import DatabaseConfig | |
| from src.entities.incident import IncidentModel | |
| from src.entities.apparatus import ApparatusModel | |
| from src.entities.personnel import PersonnelModel | |
| from src.entities.auv_incidentbase import IncidentBaseModel | |
| from src.entities.auv_incidentapparatus import IncidentApparatusModel | |
| from src.entities.auv_incidentpersonnel import IncidentPersonnelModel | |
| EXCEL_FILE = "ProjectTablesData.xlsx" | |
| EXCEL_SHEET_MODEL_MAP = { | |
| 0: IncidentModel, | |
| 1: ApparatusModel, | |
| 2: PersonnelModel, | |
| 3: IncidentBaseModel, | |
| 4: IncidentApparatusModel, | |
| 5: IncidentPersonnelModel | |
| } | |
| def clean_value(value): | |
| # Handle NaN | |
| try: | |
| if pd.isna(value): | |
| return None | |
| except (TypeError, ValueError): | |
| pass | |
| # Handle pandas Timestamp → return datetime object (NOT string) | |
| if isinstance(value, pd.Timestamp): | |
| if value == pd.Timestamp("1900-01-01"): # Excel empty date default | |
| return None | |
| return value.to_pydatetime() # ✅ Return actual datetime, not string | |
| # Convert Python time -> string | |
| if isinstance(value, time): | |
| return value.strftime("%H:%M:%S") | |
| # Convert timedelta -> string | |
| if isinstance(value, timedelta): | |
| total_seconds = int(value.total_seconds()) | |
| h = total_seconds // 3600 | |
| m = (total_seconds % 3600) // 60 | |
| s = total_seconds % 60 | |
| return f"{h:02}:{m:02}:{s:02}" | |
| # Convert numpy types | |
| if isinstance(value, np.generic): | |
| return value.item() | |
| # Strip strings | |
| if isinstance(value, str): | |
| value = value.strip() | |
| return value if value else None | |
| return value | |
| def get_model_columns(model): | |
| """Extract SQLAlchemy column names""" | |
| return {column.name for column in model.__table__.columns} | |
| def get_primary_keys(model): | |
| """Get primary key column names""" | |
| return [column.name for column in model.__table__.primary_key.columns] | |
| def row_to_model(model, row): | |
| """Convert pandas row → SQLAlchemy model""" | |
| model_columns = get_model_columns(model) | |
| data = {} | |
| for column in model_columns: | |
| if column in row: | |
| data[column] = clean_value(row[column]) | |
| return model(**data) | |
| async def get_existing_primary_keys(session, model, pk_column): | |
| """ | |
| Fetch existing primary keys from DB | |
| """ | |
| result = await session.execute( | |
| select(getattr(model, pk_column)) | |
| ) | |
| return {row[0] for row in result.fetchall()} | |
| async def ingest_excel(): | |
| print("Loading Excel file...") | |
| excel_data = pd.read_excel(EXCEL_FILE, sheet_name=None) | |
| async with DatabaseConfig.async_session() as session: | |
| total_inserted = 0 | |
| for sheet_index, model in EXCEL_SHEET_MODEL_MAP.items(): | |
| sheet_name = list(excel_data.keys())[sheet_index] | |
| print(f"\nProcessing Sheet: {sheet_name}") | |
| print(f"Mapped Model: {model.__name__}") | |
| df = excel_data[sheet_name] | |
| pk_columns = get_primary_keys(model) | |
| existing_keys = set() | |
| seen_keys = set() | |
| if pk_columns: | |
| pk_column = pk_columns[0] | |
| existing_keys = await get_existing_primary_keys(session, model, pk_column) | |
| objects = [] | |
| skipped = 0 | |
| for _, row in df.iterrows(): | |
| obj = row_to_model(model, row) | |
| if pk_columns: | |
| pk_value = getattr(obj, pk_columns[0], None) | |
| if pk_value in existing_keys or pk_value in seen_keys: | |
| skipped += 1 | |
| continue | |
| seen_keys.add(pk_value) | |
| objects.append(obj) | |
| session.add_all(objects) | |
| inserted = len(objects) | |
| print(f"Inserted: {inserted}") | |
| print(f"Skipped duplicates: {skipped}") | |
| total_inserted += inserted | |
| await session.commit() | |
| print("\nTotal rows inserted:", total_inserted) | |
| if __name__ == "__main__": | |
| asyncio.run(ingest_excel()) |