| import pandas as pd |
| import json |
|
|
| _engine = None |
|
|
| def get_engine(): |
| global _engine |
| if _engine is None: |
| from sqlalchemy import create_engine |
| import urllib |
| |
| SERVER = "innotrack-sql-server.database.windows.net" |
| DATABASE = "InnoTrackDB" |
| USERNAME = "innotrackadmin" |
| PASSWORD = "Innotrack@admin233" |
|
|
| params = urllib.parse.quote_plus( |
| f"DRIVER={{ODBC Driver 18 for SQL Server}};" |
| f"SERVER={SERVER};" |
| f"DATABASE={DATABASE};" |
| f"UID={USERNAME};" |
| f"PWD={PASSWORD};" |
| "Encrypt=yes;" |
| "TrustServerCertificate=no;" |
| "Connection Timeout=30;" |
| ) |
|
|
| connection_string = ( |
| f"mssql+pyodbc:///?odbc_connect={params}" |
| ) |
| _engine = create_engine(connection_string, pool_pre_ping=True) |
| |
| |
| try: |
| with _engine.connect() as conn: |
| print("SQL Connected Successfully") |
| except Exception as e: |
| print("Connection Failed") |
| print(e) |
| |
| return _engine |
|
|
| class LazyEngine: |
| def __getattr__(self, name): |
| return getattr(get_engine(), name) |
| def __repr__(self): |
| return repr(get_engine()) |
|
|
| engine = LazyEngine() |
|
|
|
|
| def load_preprocessed_projects(): |
| try: |
| query = """ |
| SELECT * |
| FROM preprocess |
| """ |
|
|
| with engine.connect() as conn: |
| df = pd.read_sql( |
| query, |
| conn |
| ) |
|
|
| if "features" in df.columns: |
|
|
| def parse_features(x): |
|
|
| if not isinstance(x, str): |
| return x |
|
|
| try: |
| x = json.loads(x) |
|
|
| if isinstance(x, str): |
| x = json.loads(x) |
|
|
| return x |
|
|
| except Exception: |
| return [] |
|
|
| df["features"] = df["features"].apply(parse_features) |
|
|
| return df |
|
|
| except Exception as e: |
| print(f"Database connection failed, falling back to local metadata file. Error: {e}") |
| import os |
| from pathlib import Path |
| possible_paths = [ |
| Path(__file__).resolve().parents[2] / "models" / "metadata.parquet", |
| Path("models/metadata.parquet"), |
| Path("../models/metadata.parquet"), |
| ] |
| for path in possible_paths: |
| if path.exists(): |
| print(f"Loading local metadata from {path}") |
| df = pd.read_parquet(path) |
| if "features" in df.columns: |
| df["features"] = df["features"].apply( |
| lambda x: x.tolist() if hasattr(x, "tolist") else (list(x) if isinstance(x, (list, tuple, set)) else []) |
| ) |
| return df |
| raise FileNotFoundError(f"Could not connect to database and local metadata.parquet was not found. Looked in: {[str(p) for p in possible_paths]}") |