""" SQLite database layer for storing and querying polymer datasheets. """ from __future__ import annotations import logging from typing import Optional import pandas as pd from sqlalchemy import ( Column, String, Text, create_engine, or_, ) from sqlalchemy.orm import Session, declarative_base, sessionmaker import config from models import DatasheetRecord logger = logging.getLogger(__name__) Base = declarative_base() # ── ORM Model ──────────────────────────────────────────────────────────────── class DatasheetRow(Base): """SQLAlchemy model mirroring DatasheetRecord.""" __tablename__ = "datasheets" id = Column(String, primary_key=True) created_at = Column(String, default="") # General material_name = Column(Text, default="") trade_name = Column(Text, default="") manufacturer = Column(Text, default="", index=True) polymer_family = Column(Text, default="", index=True) grade = Column(Text, default="") description = Column(Text, default="") processing_method = Column(Text, default="") features = Column(Text, default="") applications = Column(Text, default="") source_url = Column(Text, default="") # Mechanical tensile_strength_mpa = Column(Text, default="") tensile_modulus_mpa = Column(Text, default="") elongation_at_break_pct = Column(Text, default="") flexural_strength_mpa = Column(Text, default="") flexural_modulus_mpa = Column(Text, default="") impact_strength_charpy_kj_m2 = Column(Text, default="") impact_strength_izod_j_m = Column(Text, default="") hardness_shore_d = Column(Text, default="") hardness_rockwell = Column(Text, default="") compressive_strength_mpa = Column(Text, default="") # Thermal melting_temperature_c = Column(Text, default="") glass_transition_temperature_c = Column(Text, default="") heat_deflection_temperature_c = Column(Text, default="") vicat_softening_temperature_c = Column(Text, default="") continuous_service_temperature_c = Column(Text, default="") thermal_conductivity_w_mk = Column(Text, default="") coefficient_of_thermal_expansion_um_mk = Column(Text, default="") flammability_rating = Column(Text, default="") # Physical density_g_cm3 = Column(Text, default="") melt_flow_index_g_10min = Column(Text, default="") water_absorption_pct = Column(Text, default="") moisture_absorption_pct = Column(Text, default="") specific_gravity = Column(Text, default="") transparency = Column(Text, default="") color = Column(Text, default="") # Electrical dielectric_strength_kv_mm = Column(Text, default="") dielectric_constant = Column(Text, default="") volume_resistivity_ohm_cm = Column(Text, default="") surface_resistivity_ohm = Column(Text, default="") dissipation_factor = Column(Text, default="") # Chemical Resistance acid_resistance = Column(Text, default="") alkali_resistance = Column(Text, default="") solvent_resistance = Column(Text, default="") uv_resistance = Column(Text, default="") weatherability = Column(Text, default="") # Regulatory fda_approved = Column(Text, default="") rohs_compliant = Column(Text, default="") reach_compliant = Column(Text, default="") ul94_rating = Column(Text, default="") # ── Database Manager ───────────────────────────────────────────────────────── class DatasheetDB: """Manages all database operations for the polymer datasheet store.""" def __init__(self, db_path: str = config.DB_PATH): self.engine = create_engine(f"sqlite:///{db_path}", echo=False) Base.metadata.create_all(self.engine) self.SessionLocal = sessionmaker(bind=self.engine) # ── Write ───────────────────────────────────────────────────────────── def upsert(self, record: DatasheetRecord) -> str: """Insert or update a datasheet record. Returns the record ID.""" data = record.to_flat_dict() with self.SessionLocal() as session: existing = session.get(DatasheetRow, data["id"]) if existing: for key, value in data.items(): setattr(existing, key, value) else: row = DatasheetRow(**data) session.add(row) session.commit() logger.info("Upserted record %s (%s)", data["id"], data.get("trade_name")) return data["id"] # ── Read ────────────────────────────────────────────────────────────── def search( self, query: str = "", manufacturer: str = "", polymer_family: str = "", limit: int = 50, ) -> pd.DataFrame: """ Search the database with optional filters. Returns a Pandas DataFrame. """ with self.SessionLocal() as session: q = session.query(DatasheetRow) if manufacturer: q = q.filter( DatasheetRow.manufacturer.ilike(f"%{manufacturer}%") ) if polymer_family: q = q.filter( DatasheetRow.polymer_family.ilike(f"%{polymer_family}%") ) if query: pattern = f"%{query}%" q = q.filter( or_( DatasheetRow.material_name.ilike(pattern), DatasheetRow.trade_name.ilike(pattern), DatasheetRow.manufacturer.ilike(pattern), DatasheetRow.polymer_family.ilike(pattern), DatasheetRow.grade.ilike(pattern), DatasheetRow.description.ilike(pattern), DatasheetRow.applications.ilike(pattern), DatasheetRow.features.ilike(pattern), ) ) rows = q.limit(limit).all() if not rows: return pd.DataFrame() records = [] for row in rows: records.append( {c.name: getattr(row, c.name) for c in DatasheetRow.__table__.columns} ) return pd.DataFrame(records) def get_by_id(self, record_id: str) -> Optional[DatasheetRecord]: """Retrieve a single record by ID.""" with self.SessionLocal() as session: row = session.get(DatasheetRow, record_id) if row is None: return None data = {c.name: getattr(row, c.name) for c in DatasheetRow.__table__.columns} return DatasheetRecord(**data) def get_all_dataframe(self) -> pd.DataFrame: """Return the entire database as a DataFrame.""" return self.search(limit=10_000) def count(self) -> int: """Return total number of records.""" with self.SessionLocal() as session: return session.query(DatasheetRow).count() def delete(self, record_id: str) -> bool: """Delete a record by ID. Returns True if deleted.""" with self.SessionLocal() as session: row = session.get(DatasheetRow, record_id) if row: session.delete(row) session.commit() return True return False def get_summary_dataframe(self) -> pd.DataFrame: """Return a summary view with key columns only.""" df = self.get_all_dataframe() if df.empty: return df summary_cols = [ "id", "material_name", "trade_name", "manufacturer", "polymer_family", "grade", "density_g_cm3", "tensile_strength_mpa", "melting_temperature_c", "heat_deflection_temperature_c", "applications", "created_at", ] available = [c for c in summary_cols if c in df.columns] return df[available]