""" FastAPI Profiling Service v2 — NVIDIA NIM powered. Endpoints: POST /profile → Profile company + compute score (single pipeline) GET /health → Service health check Security: Bearer token authentication (shared secret with Node.js orchestration layer) """ import logging from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException, Depends from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from pydantic import BaseModel from typing import Optional from config import settings from profiler import generate_profile from scorer import compute_score from hallucination_guard import validate_score_grounded logging.basicConfig(level=getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO)) logger = logging.getLogger(__name__) # ─── Auth ───────────────────────────────────────────────────── security = HTTPBearer() def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): if credentials.credentials != settings.PYTHON_AI_SERVICE_SECRET: raise HTTPException(status_code=401, detail="Invalid authentication") return True # ─── Models ─────────────────────────────────────────────────── class CompanyInput(BaseModel): id: Optional[str] = None name: str industry: str = "" employee_count: Optional[int] = None description: str = "" website_text: str = "" linkedin_description: str = "" tech_stack: list[str] = [] ai_job_count: int = 0 pain_signals: list[str] = [] service_match: Optional[str] = None class ContactInput(BaseModel): full_name: str = "" email: Optional[str] = None email_verified: bool = False linkedin_personal_url: Optional[str] = None social_profiles: dict = {} class ProfileRequest(BaseModel): company: CompanyInput contacts: list[ContactInput] = [] trace_id: str = "" # ─── App ────────────────────────────────────────────────────── @asynccontextmanager async def lifespan(app: FastAPI): logger.info("🚀 AI Profiling Service v2 starting...") logger.info(f" NVIDIA NIM: {settings.NVIDIA_NIM_BASE_URL}") logger.info(f" Models: GPT OSS → Gemma 3 → LLaMA 70B → LLaMA 8B → Deterministic") yield logger.info("AI Profiling Service shutting down") app = FastAPI( title="AI Lead Profiling Service", version="2.0.0", lifespan=lifespan, ) # ─── Endpoints ──────────────────────────────────────────────── @app.get("/health") async def health(): return { "status": "healthy", "version": "2.0.0", "models": { "primary": "nvidia/llama-3.1-nemotron-ultra-253b-v1", "secondary": "google/gemma-3-27b-it", "tertiary": "meta/llama-3.3-70b-instruct", "fast": "meta/llama-3.1-8b-instruct", }, } @app.post("/profile") async def profile_company(request: ProfileRequest, _auth: bool = Depends(verify_token)): """ Full profiling pipeline: 1. LLM generates profile (chain-of-thought, grounded) 2. LLM extracts signals for scoring 3. Code computes score deterministically 4. Both are validated for hallucinations """ company_data = request.company.model_dump() contacts_data = [c.model_dump() for c in request.contacts] trace_id = request.trace_id try: # Step 1: Generate profile (LLM with grounding) profile = await generate_profile(company_data, trace_id) # Step 2: Compute score (LLM extracts signals → code computes) score = await compute_score(company_data, profile, contacts_data, trace_id) # Step 3: Validate score consistency score_validation = validate_score_grounded(score, profile) if not score_validation["is_valid"]: logger.warning(f"Score validation issues: {score_validation['issues']}") return { "profile": profile, "score": score, "validation": { "profile_grounded": profile.get("grounding_score", 0), "profile_consistent": profile.get("is_consistent", True), "score_valid": score_validation["is_valid"], "score_issues": score_validation.get("issues", []), }, "meta": { "model_used": profile.get("llm_model", "unknown"), "is_fallback": profile.get("is_fallback", False), "tokens_used": profile.get("tokens_used", 0), "trace_id": trace_id, }, } except Exception as e: logger.error(f"Profiling failed for {company_data.get('name')}: {e}") raise HTTPException(status_code=500, detail=str(e)) # ─── Run ────────────────────────────────────────────────────── if __name__ == "__main__": import uvicorn uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)