Spaces:

petermutwiri
/

analytics-engine

Paused

App Files Files Community

Peter Mutwiri commited on Nov 17, 2025

Commit

a0ff994

1 Parent(s): 318d2d8

feat: enterprise AI stack with hybrid entity detection

Browse files

Files changed (12) hide show

.env +0 -1
app/deps.py +176 -6
app/hybrid_entity_detector.py +33 -0
app/main.py +232 -42
app/mapper.py +13 -12
app/redis_client.py +13 -0
app/routers/health.py +94 -3
app/service/ai_service.py +61 -0
app/service/embedding_service.py +32 -0
app/service/llm_service.py +60 -0
app/tasks/worker.py +145 -0
fly.toml +0 -23

.env DELETED Viewed

	@@ -1 +0,0 @@
1	- API_KEYS=dev-analytics-key-123

app/deps.py CHANGED Viewed

@@ -1,10 +1,180 @@
 import os
 from fastapi import HTTPException, Header
-API_KEYS = os.getenv("API_KEYS", "").split(",")
-def verify_key(x_api_key: str = Header(None, convert_underscores=True)):   # ← accept any case
-    print(f"[verify_key] received: {x_api_key}, allowed: {API_KEYS}")
-    if not x_api_key or x_api_key not in API_KEYS:
-        raise HTTPException(status_code=401, detail="Invalid API key")
-    return x_api_key

+# ── Standard Library ──────────────────────────────────────────────────────────
 import os
+from typing import Optional
+import pathlib
+# ── Third-Party ────────────────────────────────────────────────────────────────
+import duckdb
 from fastapi import HTTPException, Header
+from upstash_redis import Redis
+# ── Configuration Paths ────────────────────────────────────────────────────────
+# Use YOUR existing pattern from app/db.py (multi-tenant)
+DATA_DIR = pathlib.Path("./data/duckdb")
+DATA_DIR.mkdir(parents=True, exist_ok=True)
+# Vector database for AI embeddings (shared but org-filtered)
+VECTOR_DB_PATH = DATA_DIR / "vectors.duckdb"
+# ── Secrets Management ─────────────────────────────────────────────────────────
+def get_secret(name: str, required: bool = True) -> Optional[str]:
+    """
+    Centralized secret retrieval with validation.
+    Fails fast on missing required secrets.
+    """
+    value = os.getenv(name)
+    if required and (not value or value.strip() == ""):
+        raise ValueError(f"🔴 CRITICAL: Required secret '{name}' not found in HF environment")
+    return value
+# API Keys (comma-separated for multiple Vercel projects)
+API_KEYS = get_secret("API_KEYS").split(",") if get_secret("API_KEYS") else []
+# Upstash Redis Bridge (required for Vercel ↔ HF communication)
+REDIS_URL = get_secret("UPSTASH_REDIS_REST_URL")
+REDIS_TOKEN = get_secret("UPSTASH_REDIS_REST_TOKEN")
+# Hugging Face Token (read-only, for model download)
+HF_API_TOKEN = get_secret("HF_API_TOKEN", required=False)
+# QStash Token (optional, for advanced queue features)
+QSTASH_TOKEN = get_secret("QSTASH_TOKEN", required=False)
+# ── Singleton Database Connections ──────────────────────────────────────────────
+_org_db_connections = {}
+_vector_db_conn = None
+def get_duckdb(org_id: str):
+    """
+    Multi-tenant DuckDB connection (YOUR proven pattern).
+    Each org gets isolated: ./data/duckdb/{org_id}.duckdb
+    """
+    if org_id not in _org_db_connections:
+        db_file = DATA_DIR / f"{org_id}.duckdb"
+        conn = duckdb.connect(str(db_file), read_only=False)
+        # Ensure schemas exist
+        conn.execute("CREATE SCHEMA IF NOT EXISTS main")
+        conn.execute("CREATE SCHEMA IF NOT EXISTS vector_store")
+        # Enable vector search extension
+        try:
+            conn.execute("INSTALL vss;")
+            conn.execute("LOAD vss;")
+        except Exception as e:
+            print(f"⚠️ VSS extension warning (non-critical): {e}")
+        _org_db_connections[org_id] = conn
+    return _org_db_connections[org_id]
+def get_vector_db():
+    """
+    Shared vector database for AI embeddings.
+    Org-isolated via queries: WHERE org_id = ?
+    """
+    global _vector_db_conn
+    if _vector_db_conn is None:
+        _vector_db_conn = duckdb.connect(str(VECTOR_DB_PATH), read_only=False)
+        # Enable vector search
+        _vector_db_conn.execute("INSTALL vss;")
+        _vector_db_conn.execute("LOAD vss;")
+        # Create schema and table
+        _vector_db_conn.execute("CREATE SCHEMA IF NOT EXISTS vector_store")
+        _vector_db_conn.execute("""
+            CREATE TABLE IF NOT EXISTS vector_store.embeddings (
+                id VARCHAR PRIMARY KEY,
+                org_id VARCHAR NOT NULL,
+                content TEXT,
+                embedding FLOAT[384],
+                entity_type VARCHAR,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+        # Performance index for org-filtered searches
+        try:
+            _vector_db_conn.execute("""
+                CREATE INDEX IF NOT EXISTS idx_org_entity
+                ON vector_store.embeddings (org_id, entity_type)
+            """)
+        except Exception as e:
+            print(f"⚠️ Index creation warning: {e}")
+    return _vector_db_conn
+# ── Redis Singleton ────────────────────────────────────────────────────────────
+_redis_client = None
+def get_redis():
+    """
+    Upstash Redis client (singleton) for Vercel bridge.
+    """
+    global _redis_client
+    if _redis_client is None:
+        _redis_client = Redis(url=REDIS_URL, token=REDIS_TOKEN)
+        # Test connection on first load
+        try:
+            _redis_client.ping()
+            print("✅ Redis bridge connected")
+        except Exception as e:
+            raise RuntimeError(f"🔴 Redis connection failed: {e}")
+    return _redis_client
+# ── API Security Dependency ────────────────────────────────────────────────────
+def verify_api_key(x_api_key: str = Header(..., alias="X-API-KEY")):
+    """
+    FastAPI dependency for Vercel endpoints.
+    Rejects invalid API keys with 401.
+    """
+    if not API_KEYS:
+        raise HTTPException(
+            status_code=500,
+            detail="🔴 API_KEYS not configured in HF environment"
+        )
+    if x_api_key not in API_KEYS:
+        raise HTTPException(
+            status_code=401,
+            detail="❌ Invalid API key"
+        )
+    return x_api_key
+# ── Health Check Utilities ─────────────────────────────────────────────────────
+def check_all_services():
+    """
+    Comprehensive health check for /health endpoint.
+    Returns dict with service statuses.
+    """
+    statuses = {}
+    # Check DuckDB
+    try:
+        conn = get_duckdb("health_check")
+        conn.execute("SELECT 1")
+        statuses["duckdb"] = "✅ connected"
+    except Exception as e:
+        statuses["duckdb"] = f"❌ {e}"
+    # Check Vector DB
+    try:
+        vdb = get_vector_db()
+        vdb.execute("SELECT 1")
+        statuses["vector_db"] = "✅ connected"
+    except Exception as e:
+        statuses["vector_db"] = f"❌ {e}"
+    # Check Redis
+    try:
+        r = get_redis()
+        r.ping()
+        statuses["redis"] = "✅ connected"
+    except Exception as e:
+        statuses["redis"] = f"❌ {e}"
+    return statuses

app/hybrid_entity_detector.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# app/hybrid_entity_detector.py
+from typing import Tuple
+import pandas as pd
+from app.entity_detector import detect_entity_type as rule_based_detect
+from app.service.ai_service import ai_service
+def hybrid_detect_entity_type(org_id: str, df: pd.DataFrame, filename: str) -> Tuple[str, float, bool]:
+    """
+    Hybrid detection: Rule-based (fast) → LLM fallback (accurate).
+    Returns: (entity_type, confidence, is_confident)
+    """
+    # 1. Rule-based first (your proven logic)
+    entity_type, confidence = rule_based_detect(df)
+    # 2. If highly confident, return immediately
+    if confidence > 0.75:
+        return entity_type, confidence, True
+    # 3. LLM fallback for edge cases
+    columns = list(df.columns)
+    try:
+        llm_result = ai_service.detect_entity_type(org_id, columns, filename)
+        # Use LLM result if it's more confident
+        if llm_result["confidence"] > confidence:
+            return llm_result["entity_type"], llm_result["confidence"], True
+        # LLM agrees but with lower confidence
+        return entity_type, confidence, False
+    except Exception as e:
+        print(f"[hybrid] LLM fallback failed: {e}, using rule-based")
+        return entity_type, confidence, False

app/main.py CHANGED Viewed

@@ -1,64 +1,254 @@
-from fastapi import FastAPI, Depends
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.encoders import jsonable_encoder
 from fastapi.responses import JSONResponse
-from app.routers import ingress, reports, flags, datasources, scheduler, run, health, socket
-from app.tasks.scheduler import start_scheduler
-from app.deps import verify_key
-from contextlib import asynccontextmanager
-import os
-# ----------  lifespan  ----------
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    start_scheduler()
     yield
-# ----------  app init  ----------
 app = FastAPI(
     title="MutSyncHub Analytics Engine",
-    version="2.2",
-    lifespan=lifespan
 )
-@app.get("/")
-def read_root():
-    return {"status": "ok", "service": "analytics-engine"}
-# ----------  Socket.IO Mount  ----------
-app.mount("/socket.io", socket.socket_app)
-# ----------  Middleware (fix order) ----------
 @app.middleware("http")
-async def serialize_all_responses(request, call_next):
-    """Ensure all responses are safely JSON-serializable."""
     response = await call_next(request)
-    if isinstance(response, dict):
-        return JSONResponse(content=jsonable_encoder(response))
     return response
-# ----------  CORS Configuration ----------
-origins = [
-    "https://mut-sync-hub.vercel.app",  # live frontend
-    "http://localhost:3000",            # local dev
 ]
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=origins,
     allow_credentials=True,
-    allow_methods=["*"],
     allow_headers=["*"],
 )
-# ----------  Routers ----------
-app.include_router(health.router)  # public route (no key)
-app.include_router(datasources.router, dependencies=[Depends(verify_key)])
-app.include_router(reports.router, dependencies=[Depends(verify_key)])
-app.include_router(flags.router, dependencies=[Depends(verify_key)])
-app.include_router(scheduler.router, dependencies=[Depends(verify_key)])
-app.include_router(run.router, dependencies=[Depends(verify_key)])
-app.include_router(socket.router)
-# ----------  Public Health Endpoint ----------
-@app.get("/health")
-def health_check():
-    return {"status": "ok", "service": "analytics-engine"}

+# app/main.py – ENTERPRISE ANALYTICS ENGINE v3.0
+"""
+MutSyncHub Analytics Engine
+Enterprise-grade AI analytics platform with zero-cost inference
+"""
+# ─── Standard Library ─────────────────────────────────────────────────────────
+import os
+import time
+import uuid
+import logging
+# ─── Third-Party ──────────────────────────────────────────────────────────────
+from fastapi import FastAPI, Depends, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
+# ─── Router Imports ───────────────────────────────────────────────────────────
+from app.routers import (
+    health,          # Health & monitoring
+    datasources,     # Data ingestion
+    reports,         # Report generation
+    flags,           # Feature flags
+    scheduler,       # Background jobs
+    run,             # Analytics execution
+    ai,              # AI endpoints (NEW)
+)
+# ─── Dependencies ─────────────────────────────────────────────────────────────
+from app.deps import verify_api_key, check_all_services
+# ─── Logger Configuration ───────────────────────────────────────────────────────
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S"
+)
+logger = logging.getLogger(__name__)
+# ─── Lifespan Management ───────────────────────────────────────────────────────
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    """
+    Enterprise startup/shutdown sequence with health validation.
+    """
+    # ─── Startup ───────────────────────────────────────────────────────────────
+    logger.info("=" * 60)
+    logger.info("🚀 ANALYTICS ENGINE v3.0 - STARTUP SEQUENCE")
+    logger.info("=" * 60)
+    app.state.instance_id = f"engine-{uuid.uuid4().hex[:8]}"
+    logger.info(f"Instance ID: {app.state.instance_id}")
+    # Validate service health on boot
+    try:
+        services = check_all_services()
+        healthy = [k for k, v in services.items() if "✅" in str(v)]
+        unhealthy = [k for k, v in services.items() if "❌" in str(v)]
+        logger.info(f"✅ Healthy: {len(healthy)} services")
+        for svc in healthy:
+            logger.info(f"   → {svc}: {services[svc]}")
+        if unhealthy:
+            logger.warning(f"⚠️ Unhealthy: {len(unhealthy)} services")
+            for svc in unhealthy:
+                logger.warning(f"   → {svc}: {services[svc]}")
+    except Exception as e:
+        logger.error(f"🔴 Startup health check failed: {e}")
+    logger.info("✅ Startup sequence complete")
     yield
+    # ─── Shutdown ──────────────────────────────────────────────────────────────
+    logger.info("=" * 60)
+    logger.info("🛑 ANALYTICS ENGINE - SHUTDOWN SEQUENCE")
+    logger.info("=" * 60)
+    # Close all database connections
+    from app.deps import _org_db_connections, _vector_db_conn
+    if _org_db_connections:
+        for org_id, conn in _org_db_connections.items():
+            try:
+                conn.close()
+                logger.info(f"   → Closed DB: {org_id}")
+            except:
+                pass
+    if _vector_db_conn:
+        try:
+            _vector_db_conn.close()
+            logger.info("   → Closed Vector DB")
+        except:
+            pass
+    logger.info("✅ Shutdown complete")
+# ─── FastAPI Application ───────────────────────────────────────────────────────
 app = FastAPI(
     title="MutSyncHub Analytics Engine",
+    version="3.0.0",
+    description="""Enterprise-grade AI analytics engine with:
+    • Hybrid entity detection (Rule-based + LLM)
+    • Vector similarity search (DuckDB VSS)
+    • Zero external API costs (Local Mistral-7B)
+    • Multi-tenant data isolation
+    • Redis-backed async processing
+    **🔒 All endpoints require X-API-KEY header except /health**""",
+    lifespan=lifespan,
+    docs_url="/api/docs",
+    redoc_url="/api/redoc",
+    openapi_url="/api/openapi.json",
+    contact={
+        "name": "MutSyncHub Enterprise",
+        "email": "enterprise@mutsynchub.com"
+    },
+    license_info={
+        "name": "Enterprise License",
+    }
 )
+# ─── Request ID Middleware ─────────────────────────────────────────────────────
 @app.middleware("http")
+async def add_request_tracking(request: Request, call_next):
+    """
+    Add request ID and timing for observability.
+    """
+    request_id = f"req-{uuid.uuid4().hex[:12]}"
+    request.state.request_id = request_id
+    start_time = time.time()
     response = await call_next(request)
+    process_time = time.time() - start_time
+    # Add headers
+    response.headers["X-Request-ID"] = request_id
+    response.headers["X-Response-Time"] = f"{process_time:.3f}s"
+    # Log
+    logger.info(
+        f"{request.method} {request.url.path} | {response.status_code} "
+        f"| {process_time:.3f}s | {request_id}"
+    )
     return response
+# ─── Root Endpoint ─────────────────────────────────────────────────────────────
+@app.get("/", tags=["root"])
+def read_root():
+    """
+    Service information and discovery.
+    """
+    return {
+        "status": "operational",
+        "service": "MutSyncHub Analytics Engine",
+        "version": "3.0.0",
+        "mode": "production" if os.getenv("SPACE_ID") else "development",
+        "instance_id": app.state.instance_id,
+        "endpoints": {
+            "docs": "/api/docs",
+            "health": "/api/health/detailed",
+            "datasources": "/api/datasources",
+            "ai": "/api/ai",
+        },
+        "features": [
+            "Hybrid entity detection",
+            "Vector similarity search",
+            "Multi-tenant isolation",
+            "Zero-cost LLM inference",
+            "Redis-backed processing"
+        ]
+    }
+# ─── CORS Configuration ────────────────────────────────────────────────────────
+ALLOWED_ORIGINS = [
+    "https://mut-sync-hub.vercel.app",
+    "http://localhost:3000",
+    "https://studio.huggingface.co",
 ]
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=ALLOWED_ORIGINS,
     allow_credentials=True,
+    allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
     allow_headers=["*"],
+    expose_headers=["X-Request-ID", "X-Response-Time"],
+    max_age=3600,
 )
+# ─── Global Error Handler ──────────────────────────────────────────────────────
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    """
+    Catch all uncaught exceptions and return safe error response.
+    """
+    logger.error(
+        f"🔴 Unhandled error | Path: {request.url.path} | "
+        f"Request ID: {request.state.request_id} | Error: {str(exc)}",
+        exc_info=True
+    )
+    return JSONResponse(
+        status_code=500,
+        content={
+            "error": "Internal server error",
+            "message": "An unexpected error occurred. Check server logs.",
+            "request_id": request.state.request_id,
+            "timestamp": time.time()
+        }
+    )
+# ─── Router Registration ───────────────────────────────────────────────────────
+# Public routers (no authentication)
+PUBLIC_ROUTERS = [
+    (health.router, "/api"),
+]
+# Protected routers (require X-API-KEY)
+PROTECTED_ROUTERS = [
+    (datasources.router, "/api/datasources"),
+    (reports.router, "/api/reports"),
+    (flags.router, "/api/flags"),
+    (scheduler.router, "/api/scheduler"),
+    (run.router, "/api/run"),
+    (ai.router, "/api/ai"),
+]
+# Register routers with tags for OpenAPI
+for router, prefix in PUBLIC_ROUTERS:
+    app.include_router(router, prefix=prefix)
+for router, prefix in PROTECTED_ROUTERS:
+    app.include_router(
+        router,
+        prefix=prefix,
+        dependencies=[Depends(verify_api_key)],
+        tags=[prefix.split("/")[-1].title()]
+    )
+# Log router registration
+if __name__ == "__main__":
+    logger.info("=" * 60)
+    logger.info("📊 ROUTER REGISTRATION SUMMARY")
+    logger.info("=" * 60)
+    for router, prefix in PROTECTED_ROUTERS:
+        logger.info(f"🔒 {prefix:30} → PROTECTED")
+    for router, prefix in PUBLIC_ROUTERS:
+        logger.info(f"🔓 {prefix:30} → PUBLIC")
+    logger.info("=" * 60)

app/mapper.py CHANGED Viewed

@@ -6,6 +6,8 @@ import pandas as pd
 from datetime import datetime, timedelta
 from app.db import get_conn, ensure_raw_table
 from app.utils.detect_industry import _ALIAS, detect_industry
 # ----------------------  Canonical schema base  ---------------------- #
 CANONICAL = {
@@ -28,13 +30,10 @@ def map_pandas_to_duck(col: str, series: pd.Series) -> str:
     if pd.api.types.is_datetime64_any_dtype(series): return "TIMESTAMP"
     return "VARCHAR"
-# ----------  INDUSTRY DETECTION (uses centralized detect_industry) ---------- #
-def ensure_canonical_table(duck: duckdb.DuckDBPyConnection, df: pd.DataFrame) -> str:
-    """
-    Creates single canonical table and adds missing columns dynamically.
-    BULLETPROOF: Handles int column names, missing columns, race conditions.
-    """
-    table_name = "main.canonical"
     # Create base table if doesn't exist
     duck.execute(f"""
@@ -81,7 +80,7 @@ def save_dynamic_aliases() -> None:
         json.dump(CANONICAL, f, indent=2)
 # ----------  Main Canonify Function (ENTERPRISE-GRADE) ---------- #
-def canonify_df(org_id: str, hours_window: int = 24) -> tuple[pd.DataFrame, str, float]:
     """
     Enterprise ingestion pipeline:
     - Accepts ANY raw data shape
@@ -197,15 +196,17 @@ def canonify_df(org_id: str, hours_window: int = 24) -> tuple[pd.DataFrame, str,
     except Exception as e:
         print(f"[canonify] Type conversion warning (non-critical): {e}")
-    # 6) ✅ Industry detection
-    industry, confidence = detect_industry(df)
-    print(f"[canonify] 🎯 Industry: {industry} ({confidence:.1%} confidence)")
     # 7) Dynamic schema evolution
     os.makedirs("./db", exist_ok=True)
     duck = duckdb.connect(f"./db/{org_id}.duckdb")
-    table_name = ensure_canonical_table(duck, df)
     # ✅ SAFE INSERT: Match columns explicitly
     if not df.empty:

 from datetime import datetime, timedelta
 from app.db import get_conn, ensure_raw_table
 from app.utils.detect_industry import _ALIAS, detect_industry
+# app/mapper.py (add line 1)
+from app.hybrid_entity_detector import hybrid_detect_entity_type
 # ----------------------  Canonical schema base  ---------------------- #
 CANONICAL = {
     if pd.api.types.is_datetime64_any_dtype(series): return "TIMESTAMP"
     return "VARCHAR"
+# ----------  entity detection(uses ai to detect entity from the data) ---------- #
+def ensure_canonical_table(duck: duckdb.DuckDBPyConnection, df: pd.DataFrame, entity_type: str) -> str:
+    """Creates entity-specific table: main.sales_canonical, main.inventory_canonical, etc."""
+    table_name = f"main.{entity_type}_canonical"
     # Create base table if doesn't exist
     duck.execute(f"""
         json.dump(CANONICAL, f, indent=2)
 # ----------  Main Canonify Function (ENTERPRISE-GRADE) ---------- #
+def canonify_df(org_id: str, filename: str, hours_window: int = 24) -> tuple[pd.DataFrame, str, float]:
     """
     Enterprise ingestion pipeline:
     - Accepts ANY raw data shape
     except Exception as e:
         print(f"[canonify] Type conversion warning (non-critical): {e}")
+        # 6) ✅ Hybrid entity detection (rule-based + LLM fallback)
+    entity_type, confidence, is_confident = hybrid_detect_entity_type(org_id, df, filename)
+    print(f"[canonify] 🎯 Entity: {entity_type} ({confidence:.1%} confidence, AI: {not is_confident})")
+    industry = entity_type
     # 7) Dynamic schema evolution
     os.makedirs("./db", exist_ok=True)
     duck = duckdb.connect(f"./db/{org_id}.duckdb")
+        # 7) ✅ Entity-specific canonical table
+    table_name = ensure_canonical_table(duck, df, entity_type)
     # ✅ SAFE INSERT: Match columns explicitly
     if not df.empty:

app/redis_client.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# app/redis_client.py
+from app.deps import get_redis
+# Export the singleton instance
+redis = get_redis()
+# Test on import
+try:
+    redis.ping()
+    print("✅ Redis bridge connected")
+except Exception as e:
+    print(f"❌ Redis connection failed: {e}")
+    raise RuntimeError(f"Redis not available: {e}")

app/routers/health.py CHANGED Viewed

@@ -1,7 +1,98 @@
-from fastapi import APIRouter
 router = APIRouter(tags=["health"])
 @router.get("/health")
-def health():
-    return {"status": "ok", "service": "analytics-engine"}

+# app/routers/health.py – ENTERPRISE HEALTH CHECKS
+from fastapi import APIRouter, HTTPException, Depends
+from app.deps import check_all_services, get_redis, get_vector_db, get_duckdb
+import os
+import time
 router = APIRouter(tags=["health"])
 @router.get("/health")
+def health_check():
+    """
+    Basic health check for load balancers.
+    Returns 200 if service is alive.
+    """
+    return {"status": "ok", "service": "analytics-engine"}
+@router.get("/health/detailed")
+def health_detailed():
+    """
+    Comprehensive health check for all services.
+    Returns detailed status of each component.
+    """
+    start_time = time.time()
+    statuses = check_all_services()
+    # Determine overall health
+    all_healthy = all("✅" in str(status) for status in statuses.values())
+    http_status = 200 if all_healthy else 503
+    return {
+        "status": "healthy" if all_healthy else "unhealthy",
+        "services": statuses,
+        "environment": "production" if os.getenv("SPACE_ID") else "development",
+        "uptime_seconds": time.time() - start_time,
+        "timestamp": time.time()
+    }
+@router.get("/health/ready")
+def health_ready():
+    """
+    Kubernetes-style readiness probe.
+    Returns 200 if ready to serve traffic.
+    """
+    try:
+        # Quick smoke test: Can we connect to core services?
+        redis = get_redis()
+        redis.ping()
+        # Test DuckDB with a dummy org
+        conn = get_duckdb("health_check")
+        conn.execute("SELECT 1")
+        return {"status": "ready"}
+    except Exception as e:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Not ready: {str(e)}"
+        )
+@router.get("/health/live")
+def health_live():
+    """
+    Kubernetes-style liveness probe.
+    Returns 200 if service is alive (doesn't check dependencies).
+    """
+    return {"status": "alive"}
+@router.post("/health/reload")
+def health_reload(_: str = Depends(check_all_services)):
+    """
+    Trigger reload of services (if needed).
+    Requires API key for security.
+    """
+    # Clear cached connections
+    from app.deps import _org_db_connections, _vector_db_conn, _redis_client
+    _org_db_connections.clear()
+    _vector_db_conn = None
+    _redis_client = None
+    return {"status": "reloaded", "message": "Connections cleared"}
+@router.get("/health/metrics")
+def health_metrics():
+    """
+    Performance metrics for monitoring.
+    """
+    try:
+        import psutil
+        return {
+            "cpu_percent": psutil.cpu_percent(),
+            "memory_mb": psutil.virtual_memory().used // (1024 * 1024),
+            "disk_gb": psutil.disk_usage("/").free // (1024**3),
+            "connections": len(_org_db_connections) if '_org_db_connections' in globals() else 0
+        }
+    except ImportError:
+        return {"error": "psutil not installed"}

app/service/ai_service.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# app/service/ai_service.py
+import json
+from app.deps import get_vector_db
+from app.service.llm_service import llm_service
+from app.service.embedding_service import embedder
+class AIService:
+    def __init__(self):
+        self.vector_db = get_vector_db()
+        self.llm = llm_service
+        self.embedder = embedder
+    def detect_entity_type(self, org_id: str, columns: list[str], filename: str) -> dict:
+        """Detect entity type - per-org cache"""
+        columns_str = ",".join(columns)
+        # Check vector cache for this org
+        cached = self.vector_db.execute("""
+            SELECT entity_type FROM vector_store.embeddings
+            WHERE org_id = ? AND content = ?
+            ORDER BY created_at DESC LIMIT 1
+        """, [org_id, columns_str]).fetchone()
+        if cached:
+            return {"entity_type": cached[0], "confidence": 0.99, "cached": True}
+        # AI detection
+        prompt = f"""Columns: {columns_str}\nFilename: {filename}\nClassify as: sales,inventory,customer,product. JSON: {{"entity_type":"...","confidence":0.95}}"""
+        response = self.llm.generate(prompt, max_tokens=100)
+        result = json.loads(response)
+        # Cache for this org
+        embedding = self.embedder.generate(columns_str)
+        self.vector_db.execute("""
+            INSERT INTO vector_store.embeddings (org_id, content, embedding, entity_type)
+            VALUES (?, ?, ?, ?)
+        """, [org_id, columns_str, embedding, result["entity_type"]])
+        return result
+    def generate_sql(self, org_id: str, question: str, entity_type: str, schema: dict) -> str:
+        """Generate SQL for specific org"""
+        prompt = f"Org: {org_id}\nSchema: {json.dumps(schema)}\nEntity: {entity_type}\nQuestion: {question}\nDuckDB SQL only:"
+        sql = self.llm.generate(prompt, max_tokens=300)
+        return sql.strip()
+    def similarity_search(self, org_id: str, query: str, entity_type: str, top_k: int = 5) -> list[dict]:
+        """Search within org's vector history"""
+        query_vector = self.embedder.generate(query)
+        results = self.vector_db.execute("""
+            SELECT id, content, entity_type, array_cosine_similarity(embedding, ?::FLOAT[384]) as score
+            FROM vector_store.embeddings
+            WHERE org_id = ? AND entity_type = ?
+            ORDER BY score DESC
+            LIMIT ?
+        """, [query_vector, org_id, entity_type, top_k]).fetchall()
+        return [{"id": r[0], "content": r[1], "entity_type": r[2], "score": r[3]} for r in results]
+ai_service = AIService()

app/service/embedding_service.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# app/service/embedding_service.py
+import requests
+from app.deps import HF_API_TOKEN
+class EmbeddingService:
+    def __init__(self):
+        self.api_url = "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2"
+        self.headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
+    def generate(self, text: str) -> list[float]:
+        """Generate embedding - uses HF free tier (10k/day)"""
+        try:
+            response = requests.post(
+                self.api_url,
+                headers=self.headers,
+                json={"inputs": text, "options": {"wait_for_model": True}},
+                timeout=30
+            )
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            # Fallback to local if API fails
+            print(f"HF API failed, using local fallback: {e}")
+            return self._local_fallback(text)
+    def _local_fallback(self, text: str) -> list[float]:
+        """Local embedding generation (slower but reliable)"""
+        from sentence_transformers import SentenceTransformer
+        model = SentenceTransformer('all-MiniLM-L6-v2')
+        return model.encode(text).tolist()
+embedder = EmbeddingService()

app/service/llm_service.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# app/service/llm_service.py
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from app.deps import HF_API_TOKEN
+class LocalLLMService:
+    def __init__(self):
+        # FREE, permissive license, fits in T4 GPU
+        self.model_id = "mistralai/Mistral-7B-Instruct-v0.3"
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_id,
+            token=HF_API_TOKEN,
+            trust_remote_code=True
+        )
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        # Load to GPU automatically
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_id,
+            token=HF_API_TOKEN,
+            torch_dtype=torch.float16,
+            device_map="auto"
+        )
+        self.pipe = pipeline(
+            "text-generation",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            device_map="auto"
+        )
+    def generate(self, prompt: str, max_tokens: int = 500, temperature: float = 0.3) -> str:
+        """Generate text using local model"""
+        messages = [
+            {"role": "system", "content": "You are a data analytics assistant. Respond with valid JSON only."},
+            {"role": "user", "content": prompt}
+        ]
+        formatted_prompt = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        outputs = self.pipe(
+            formatted_prompt,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            do_sample=True
+        )
+        # Extract response after [/INST]
+        response = outputs[0]["generated_text"]
+        if "[/INST]" in response:
+            return response.split("[/INST]")[-1].strip()
+        return response.strip()
+# Singleton instance
+llm_service = LocalLLMService()

app/tasks/worker.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# app/tasks/worker.py – ENTERPRISE GRADE
+import json
+import time
+import signal
+import sys
+import traceback
+from typing import Dict, Any, Callable
+from app.redis_client import redis
+from app.service.ai_service import ai_service
+from app.deps import get_duckdb
+# ── Graceful Shutdown ──────────────────────────────────────────────────────────
+def shutdown(signum, frame):
+    print("\n🛑 Worker shutting down gracefully...")
+    sys.exit(0)
+signal.signal(signal.SIGINT, shutdown)
+signal.signal(signal.SIGTERM, shutdown)
+# ── Task Handler Registry ─────────────────────────────────────────────────────
+# All handlers MUST accept org_id as first argument
+TASK_HANDLERS: Dict[str, Callable] = {
+    "detect_entity_type": lambda org_id, **args: ai_service.detect_entity_type(org_id, **args),
+    "generate_sql": lambda org_id, **args: ai_service.generate_sql(org_id, **args),
+    "generate_insights": lambda org_id, **args: ai_service.generate_insights(org_id, **args),
+    "similarity_search": lambda org_id, **args: ai_service.similarity_search(org_id, **args),
+    # Mapper integration
+    "canonify_df": lambda org_id, **args: canonify_df_with_entity(org_id, **args),
+    "execute_sql": lambda org_id, **args: execute_org_sql(org_id, **args),
+}
+# ── Wrapper for Legacy Functions ──────────────────────────────────────────────
+def canonify_df_with_entity(org_id: str, filename: str, hours_window: int = 24):
+    """Bridge to your existing mapper.canoify_df"""
+    from app.mapper import canonify_df
+    # This now uses hybrid detection internally
+    return canonify_df(org_id, filename, hours_window)
+def execute_org_sql(org_id: str, sql: str):
+    """Execute SQL for specific org with safety limits"""
+    conn = get_duckdb(org_id)
+    # Security: Only allow SELECT queries
+    safe_sql = sql.strip().upper()
+    if not safe_sql.startswith("SELECT"):
+        raise ValueError("🔒 Only SELECT queries allowed")
+    # Add LIMIT 10000 if not present to prevent overload
+    if "LIMIT" not in safe_sql:
+        safe_sql += " LIMIT 10000"
+    return conn.execute(safe_sql).fetchall()
+# ── Task Processing ────────────────────────────────────────────────────────────
+def process_task(task_data: Dict[str, Any]):
+    """Process a single task with full error handling and logging"""
+    task_id = task_data.get("id")
+    function_name = task_data.get("function")
+    args = task_data.get("args", {})
+    # ── Validation ─────────────────────────────────────────────────────────────
+    if not task_id or not function_name:
+        raise ValueError("❌ Invalid task: missing id or function")
+    if "org_id" not in args:
+        raise ValueError(f"❌ Task {task_id} missing required org_id")
+    org_id = args["org_id"]
+    # ── Handler Execution ──────────────────────────────────────────────────────
+    start_time = time.time()
+    print(f"🔵 [{org_id}] Processing {function_name} (task: {task_id})")
+    try:
+        handler = TASK_HANDLERS.get(function_name)
+        if not handler:
+            raise ValueError(f"Unknown function: {function_name}")
+        # Execute handler (org_id is passed explicitly, rest via **args)
+        result = handler(org_id, **args)
+        # ── Success Response ───────────────────────────────────────────────────
+        duration = time.time() - start_time
+        print(f"✅ [{org_id}] {function_name} completed in {duration:.2f}s")
+        redis.setex(
+            f"python:response:{task_id}",
+            3600,  # 1 hour TTL
+            json.dumps({
+                "status": "success",
+                "org_id": org_id,
+                "function": function_name,
+                "data": result,
+                "duration": duration
+            })
+        )
+    except Exception as e:
+        # ── Error Response ─────────────────────────────────────────────────────
+        duration = time.time() - start_time
+        error_msg = f"{type(e).__name__}: {str(e)}"
+        print(f"❌ [{org_id}] {function_name} failed after {duration:.2f}s: {error_msg}")
+        print(traceback.format_exc())  # Full stack trace for debugging
+        redis.setex(
+            f"python:response:{task_id}",
+            3600,
+            json.dumps({
+                "status": "error",
+                "org_id": org_id,
+                "function": function_name,
+                "message": error_msg,
+                "duration": duration
+            })
+        )
+# ── Main Worker Loop ───────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    print("🚀 Python worker listening on Redis queue...")
+    print("Press Ctrl+C to stop")
+    while True:
+        try:
+            # Blocking pop with timeout (0 = infinite wait)
+            _, task_json = redis.brpop("python:task_queue", timeout=0)
+            # Deserialize with error handling
+            try:
+                task_data = json.loads(task_json)
+            except json.JSONDecodeError as e:
+                print(f"❌ Malformed task JSON: {e}")
+                continue
+            # Process task
+            process_task(task_data)
+        except KeyboardInterrupt:
+            print("\nShutting down...")
+            break
+        except Exception as e:
+            # Worker-level error (Redis connection, etc.)
+            print(f"🔴 Worker error: {e}")
+            traceback.print_exc()
+            time.sleep(5)  # Longer cooldown for worker errors

fly.toml DELETED Viewed

@@ -1,23 +0,0 @@
-# fly.toml app configuration file generated for mutsynchub on 2025-11-06T14:44:31Z
-#
-# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
-#
-app = 'mutsynchub'
-primary_region = 'iad'
-[build]
-[http_service]
-  internal_port = 8080
-  force_https = true
-  auto_stop_machines = 'stop'
-  auto_start_machines = true
-  min_machines_running = 0
-  processes = ['app']
-[[vm]]
-  memory = '1gb'
-  cpu_kind = 'shared'
-  cpus = 1
-  memory_mb = 1024