#!/usr/bin/env python3 """ EdgeHomes Embedding API - Pure FastAPI Implementation ===================================================== OpenAI-compatible embedding API using EdgeHomes ModernBERT model. Deployed as a Docker container on HuggingFace Spaces. """ from fastapi import FastAPI, HTTPException, Depends, Security from fastapi.middleware.cors import CORSMiddleware from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from pydantic import BaseModel from typing import List, Union import time import os print("🚨 STARTING EDGEHOMES EMBEDDING API 🚨") from transformers import AutoTokenizer, AutoModel import torch import torch.nn.functional as F # Load model print("Loading EdgeHomes ModernBERT model...") hf_token = os.getenv('HF_TOKEN') if not hf_token: raise ValueError("HF_TOKEN environment variable is required") print(f"🔑 Using HF_TOKEN: {hf_token[:10]}...") try: tokenizer = AutoTokenizer.from_pretrained( 'CalebCampbell/edgehomes-modernbert-v1', token=hf_token, trust_remote_code=True ) model = AutoModel.from_pretrained( 'CalebCampbell/edgehomes-modernbert-v1', token=hf_token, trust_remote_code=True ) print("✅ Model loaded successfully!") except Exception as e: print(f"❌ Failed to load model: {e}") raise def mean_pooling(model_output, attention_mask): """Mean pooling to get sentence embeddings""" token_embeddings = model_output[0] input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( input_mask_expanded.sum(1), min=1e-9 ) def encode_texts(texts): """Encode texts to embeddings""" encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt') with torch.no_grad(): model_output = model(**encoded_input) sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) return sentence_embeddings.cpu().numpy() # Pydantic models class EmbeddingRequest(BaseModel): input: Union[str, List[str]] model: str = "edgehomes-modernbert-v1" encoding_format: str = "float" class EmbeddingData(BaseModel): object: str = "embedding" embedding: List[float] index: int class Usage(BaseModel): prompt_tokens: int total_tokens: int class EmbeddingResponse(BaseModel): object: str = "list" data: List[EmbeddingData] model: str usage: Usage # FastAPI app app = FastAPI( title="EdgeHomes Embedding API", description="OpenAI-compatible embedding API using EdgeHomes ModernBERT", version="1.0.0", root_path="/", # For reverse proxy docs_url="/docs", redoc_url="/redoc", openapi_url="/openapi.json" ) # Add CORS middleware for HuggingFace Spaces app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Security security = HTTPBearer() def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)): """Verify bearer token for API access""" expected_token = os.getenv("EDGEHOMES_API_TOKEN") if not expected_token: raise HTTPException( status_code=500, detail="Server configuration error: API token not set" ) if credentials.credentials != expected_token: raise HTTPException( status_code=401, detail="Invalid authentication token", headers={"WWW-Authenticate": "Bearer"} ) return credentials.credentials # API Routes @app.get("/") async def root(): """Root endpoint with API information""" return { "service": "EdgeHomes Embedding API", "model": "edgehomes-modernbert-v1", "version": "1.0.0", "endpoints": { "embeddings": "POST /v1/embeddings", "models": "GET /v1/models", "health": "GET /health", "docs": "GET /docs" } } @app.get("/health") async def health_check(): """Health check endpoint (no authentication required)""" return { "status": "healthy", "model": "EdgeHomes ModernBERT v1", "model_loaded": model is not None } @app.post("/v1/embeddings", response_model=EmbeddingResponse) async def create_embeddings( request: EmbeddingRequest, token: str = Depends(verify_token) ): """ Generate embeddings for input text(s). OpenAI-compatible endpoint that accepts single string or array of strings. """ # Handle both string and array inputs texts = request.input if isinstance(request.input, list) else [request.input] try: # Generate embeddings embeddings = encode_texts(texts) # Convert to OpenAI format embedding_data = [] for i, embedding in enumerate(embeddings): embedding_data.append(EmbeddingData( embedding=embedding.tolist(), index=i )) # Calculate token usage (simple word count approximation) total_tokens = sum(len(text.split()) for text in texts) return EmbeddingResponse( data=embedding_data, model=request.model, usage=Usage( prompt_tokens=total_tokens, total_tokens=total_tokens ) ) except Exception as e: raise HTTPException( status_code=500, detail=f"Embedding generation failed: {str(e)}" ) @app.get("/v1/models") async def list_models(token: str = Depends(verify_token)): """List available models (OpenAI-compatible)""" return { "object": "list", "data": [ { "id": "edgehomes-modernbert-v1", "object": "model", "created": int(time.time()), "owned_by": "edgehomes", "permission": [], "root": "edgehomes-modernbert-v1", "parent": None } ] } if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)