DanielRegaladoCardoso's picture
Initial deploy: Apple/Claude design, DuckDB, 3 trained LoRAs
a067ada verified
"""
Load various data formats and convert to SQLite databases.
Supports CSV, Excel, JSON file formats and converts them
into in-memory SQLite databases for SQL querying.
"""
import sqlite3
import logging
from typing import Dict, Any, Optional
from pathlib import Path
import json
logger = logging.getLogger(__name__)
class DataLoader:
"""Load data from various formats into SQLite database."""
def __init__(self) -> None:
"""Initialize data loader."""
pass
def load_csv(
self,
file_path: str | Path,
table_name: Optional[str] = None,
in_memory: bool = True,
) -> str:
"""
Load CSV file into SQLite database.
Args:
file_path: Path to CSV file
table_name: Name for the table (default: filename without extension)
in_memory: Whether to use in-memory database
Returns:
Path to database file (or ':memory:' for in-memory)
"""
try:
import pandas as pd
file_path = Path(file_path)
if not file_path.exists():
raise FileNotFoundError(f"CSV file not found: {file_path}")
logger.info(f"Loading CSV: {file_path}")
df = pd.read_csv(file_path)
table_name = table_name or file_path.stem
db_path = self._create_database(df, table_name, in_memory)
logger.info(f"CSV loaded to {db_path}, table: {table_name}")
return db_path
except ImportError:
logger.error("pandas not installed. Install with: pip install pandas")
raise
except Exception as e:
logger.error(f"Error loading CSV: {e}")
raise
def load_excel(
self,
file_path: str | Path,
sheet_name: str = 0,
table_name: Optional[str] = None,
in_memory: bool = True,
) -> str:
"""
Load Excel file into SQLite database.
Args:
file_path: Path to Excel file
sheet_name: Sheet to load
table_name: Name for the table
in_memory: Whether to use in-memory database
Returns:
Path to database file
"""
try:
import pandas as pd
file_path = Path(file_path)
if not file_path.exists():
raise FileNotFoundError(f"Excel file not found: {file_path}")
logger.info(f"Loading Excel: {file_path}, sheet: {sheet_name}")
df = pd.read_excel(file_path, sheet_name=sheet_name)
table_name = table_name or f"table_{sheet_name}"
db_path = self._create_database(df, table_name, in_memory)
logger.info(f"Excel loaded to {db_path}, table: {table_name}")
return db_path
except ImportError:
logger.error("openpyxl/pandas not installed")
raise
except Exception as e:
logger.error(f"Error loading Excel: {e}")
raise
def load_json(
self,
file_path: str | Path,
table_name: Optional[str] = None,
in_memory: bool = True,
) -> str:
"""
Load JSON file into SQLite database.
Args:
file_path: Path to JSON file
table_name: Name for the table
in_memory: Whether to use in-memory database
Returns:
Path to database file
"""
try:
import pandas as pd
file_path = Path(file_path)
if not file_path.exists():
raise FileNotFoundError(f"JSON file not found: {file_path}")
logger.info(f"Loading JSON: {file_path}")
with open(file_path) as f:
data = json.load(f)
# Handle both list of records and single record
if isinstance(data, dict):
data = [data]
df = pd.DataFrame(data)
table_name = table_name or file_path.stem
db_path = self._create_database(df, table_name, in_memory)
logger.info(f"JSON loaded to {db_path}, table: {table_name}")
return db_path
except ImportError:
logger.error("pandas not installed")
raise
except Exception as e:
logger.error(f"Error loading JSON: {e}")
raise
def _create_database(
self,
df,
table_name: str,
in_memory: bool = True,
) -> str:
"""
Create SQLite database from DataFrame.
Args:
df: Pandas DataFrame
table_name: Name for the table
in_memory: Whether to use in-memory database
Returns:
Path to database
"""
db_path = ":memory:" if in_memory else f"{table_name}.db"
try:
conn = sqlite3.connect(db_path)
df.to_sql(table_name, conn, if_exists="replace", index=False)
conn.commit()
conn.close()
logger.info(f"Created database with table: {table_name}")
return db_path
except Exception as e:
logger.error(f"Error creating database: {e}")
raise
def load_dict_list(
self,
data: list[Dict[str, Any]],
table_name: str = "data",
in_memory: bool = True,
) -> str:
"""
Load list of dictionaries into SQLite database.
Args:
data: List of dictionaries
table_name: Name for the table
in_memory: Whether to use in-memory database
Returns:
Path to database
"""
try:
import pandas as pd
if not data:
raise ValueError("Data list is empty")
logger.info(f"Loading {len(data)} records into table: {table_name}")
df = pd.DataFrame(data)
db_path = self._create_database(df, table_name, in_memory)
return db_path
except Exception as e:
logger.error(f"Error loading data: {e}")
raise