ravimohan19's picture
Upload database.py with huggingface_hub
23e468b verified
"""
SQLite database layer for storing and querying polymer datasheets.
"""
from __future__ import annotations
import logging
from typing import Optional
import pandas as pd
from sqlalchemy import (
Column,
String,
Text,
create_engine,
or_,
)
from sqlalchemy.orm import Session, declarative_base, sessionmaker
import config
from models import DatasheetRecord
logger = logging.getLogger(__name__)
Base = declarative_base()
# ── ORM Model ────────────────────────────────────────────────────────────────
class DatasheetRow(Base):
"""SQLAlchemy model mirroring DatasheetRecord."""
__tablename__ = "datasheets"
id = Column(String, primary_key=True)
created_at = Column(String, default="")
# General
material_name = Column(Text, default="")
trade_name = Column(Text, default="")
manufacturer = Column(Text, default="", index=True)
polymer_family = Column(Text, default="", index=True)
grade = Column(Text, default="")
description = Column(Text, default="")
processing_method = Column(Text, default="")
features = Column(Text, default="")
applications = Column(Text, default="")
source_url = Column(Text, default="")
# Mechanical
tensile_strength_mpa = Column(Text, default="")
tensile_modulus_mpa = Column(Text, default="")
elongation_at_break_pct = Column(Text, default="")
flexural_strength_mpa = Column(Text, default="")
flexural_modulus_mpa = Column(Text, default="")
impact_strength_charpy_kj_m2 = Column(Text, default="")
impact_strength_izod_j_m = Column(Text, default="")
hardness_shore_d = Column(Text, default="")
hardness_rockwell = Column(Text, default="")
compressive_strength_mpa = Column(Text, default="")
# Thermal
melting_temperature_c = Column(Text, default="")
glass_transition_temperature_c = Column(Text, default="")
heat_deflection_temperature_c = Column(Text, default="")
vicat_softening_temperature_c = Column(Text, default="")
continuous_service_temperature_c = Column(Text, default="")
thermal_conductivity_w_mk = Column(Text, default="")
coefficient_of_thermal_expansion_um_mk = Column(Text, default="")
flammability_rating = Column(Text, default="")
# Physical
density_g_cm3 = Column(Text, default="")
melt_flow_index_g_10min = Column(Text, default="")
water_absorption_pct = Column(Text, default="")
moisture_absorption_pct = Column(Text, default="")
specific_gravity = Column(Text, default="")
transparency = Column(Text, default="")
color = Column(Text, default="")
# Electrical
dielectric_strength_kv_mm = Column(Text, default="")
dielectric_constant = Column(Text, default="")
volume_resistivity_ohm_cm = Column(Text, default="")
surface_resistivity_ohm = Column(Text, default="")
dissipation_factor = Column(Text, default="")
# Chemical Resistance
acid_resistance = Column(Text, default="")
alkali_resistance = Column(Text, default="")
solvent_resistance = Column(Text, default="")
uv_resistance = Column(Text, default="")
weatherability = Column(Text, default="")
# Regulatory
fda_approved = Column(Text, default="")
rohs_compliant = Column(Text, default="")
reach_compliant = Column(Text, default="")
ul94_rating = Column(Text, default="")
# ── Database Manager ─────────────────────────────────────────────────────────
class DatasheetDB:
"""Manages all database operations for the polymer datasheet store."""
def __init__(self, db_path: str = config.DB_PATH):
self.engine = create_engine(f"sqlite:///{db_path}", echo=False)
Base.metadata.create_all(self.engine)
self.SessionLocal = sessionmaker(bind=self.engine)
# ── Write ─────────────────────────────────────────────────────────────
def upsert(self, record: DatasheetRecord) -> str:
"""Insert or update a datasheet record. Returns the record ID."""
data = record.to_flat_dict()
with self.SessionLocal() as session:
existing = session.get(DatasheetRow, data["id"])
if existing:
for key, value in data.items():
setattr(existing, key, value)
else:
row = DatasheetRow(**data)
session.add(row)
session.commit()
logger.info("Upserted record %s (%s)", data["id"], data.get("trade_name"))
return data["id"]
# ── Read ──────────────────────────────────────────────────────────────
def search(
self,
query: str = "",
manufacturer: str = "",
polymer_family: str = "",
limit: int = 50,
) -> pd.DataFrame:
"""
Search the database with optional filters.
Returns a Pandas DataFrame.
"""
with self.SessionLocal() as session:
q = session.query(DatasheetRow)
if manufacturer:
q = q.filter(
DatasheetRow.manufacturer.ilike(f"%{manufacturer}%")
)
if polymer_family:
q = q.filter(
DatasheetRow.polymer_family.ilike(f"%{polymer_family}%")
)
if query:
pattern = f"%{query}%"
q = q.filter(
or_(
DatasheetRow.material_name.ilike(pattern),
DatasheetRow.trade_name.ilike(pattern),
DatasheetRow.manufacturer.ilike(pattern),
DatasheetRow.polymer_family.ilike(pattern),
DatasheetRow.grade.ilike(pattern),
DatasheetRow.description.ilike(pattern),
DatasheetRow.applications.ilike(pattern),
DatasheetRow.features.ilike(pattern),
)
)
rows = q.limit(limit).all()
if not rows:
return pd.DataFrame()
records = []
for row in rows:
records.append(
{c.name: getattr(row, c.name) for c in DatasheetRow.__table__.columns}
)
return pd.DataFrame(records)
def get_by_id(self, record_id: str) -> Optional[DatasheetRecord]:
"""Retrieve a single record by ID."""
with self.SessionLocal() as session:
row = session.get(DatasheetRow, record_id)
if row is None:
return None
data = {c.name: getattr(row, c.name) for c in DatasheetRow.__table__.columns}
return DatasheetRecord(**data)
def get_all_dataframe(self) -> pd.DataFrame:
"""Return the entire database as a DataFrame."""
return self.search(limit=10_000)
def count(self) -> int:
"""Return total number of records."""
with self.SessionLocal() as session:
return session.query(DatasheetRow).count()
def delete(self, record_id: str) -> bool:
"""Delete a record by ID. Returns True if deleted."""
with self.SessionLocal() as session:
row = session.get(DatasheetRow, record_id)
if row:
session.delete(row)
session.commit()
return True
return False
def get_summary_dataframe(self) -> pd.DataFrame:
"""Return a summary view with key columns only."""
df = self.get_all_dataframe()
if df.empty:
return df
summary_cols = [
"id", "material_name", "trade_name", "manufacturer",
"polymer_family", "grade", "density_g_cm3",
"tensile_strength_mpa", "melting_temperature_c",
"heat_deflection_temperature_c", "applications",
"created_at",
]
available = [c for c in summary_cols if c in df.columns]
return df[available]