Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| EdgeHomes Embedding API - Pure FastAPI Implementation | |
| ===================================================== | |
| OpenAI-compatible embedding API using EdgeHomes ModernBERT model. | |
| Deployed as a Docker container on HuggingFace Spaces. | |
| """ | |
| from fastapi import FastAPI, HTTPException, Depends, Security | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials | |
| from pydantic import BaseModel | |
| from typing import List, Union | |
| import time | |
| import os | |
| print("๐จ STARTING EDGEHOMES EMBEDDING API ๐จ") | |
| from transformers import AutoTokenizer, AutoModel | |
| import torch | |
| import torch.nn.functional as F | |
| # Load model | |
| print("Loading EdgeHomes ModernBERT model...") | |
| hf_token = os.getenv('HF_TOKEN') | |
| if not hf_token: | |
| raise ValueError("HF_TOKEN environment variable is required") | |
| print(f"๐ Using HF_TOKEN: {hf_token[:10]}...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| 'CalebCampbell/edgehomes-modernbert-v1', | |
| token=hf_token, | |
| trust_remote_code=True | |
| ) | |
| model = AutoModel.from_pretrained( | |
| 'CalebCampbell/edgehomes-modernbert-v1', | |
| token=hf_token, | |
| trust_remote_code=True | |
| ) | |
| print("โ Model loaded successfully!") | |
| except Exception as e: | |
| print(f"โ Failed to load model: {e}") | |
| raise | |
| def mean_pooling(model_output, attention_mask): | |
| """Mean pooling to get sentence embeddings""" | |
| token_embeddings = model_output[0] | |
| input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() | |
| return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( | |
| input_mask_expanded.sum(1), min=1e-9 | |
| ) | |
| def encode_texts(texts): | |
| """Encode texts to embeddings""" | |
| encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt') | |
| with torch.no_grad(): | |
| model_output = model(**encoded_input) | |
| sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) | |
| sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) | |
| return sentence_embeddings.cpu().numpy() | |
| # Pydantic models | |
| class EmbeddingRequest(BaseModel): | |
| input: Union[str, List[str]] | |
| model: str = "edgehomes-modernbert-v1" | |
| encoding_format: str = "float" | |
| class EmbeddingData(BaseModel): | |
| object: str = "embedding" | |
| embedding: List[float] | |
| index: int | |
| class Usage(BaseModel): | |
| prompt_tokens: int | |
| total_tokens: int | |
| class EmbeddingResponse(BaseModel): | |
| object: str = "list" | |
| data: List[EmbeddingData] | |
| model: str | |
| usage: Usage | |
| # FastAPI app | |
| app = FastAPI( | |
| title="EdgeHomes Embedding API", | |
| description="OpenAI-compatible embedding API using EdgeHomes ModernBERT", | |
| version="1.0.0", | |
| root_path="/", # For reverse proxy | |
| docs_url="/docs", | |
| redoc_url="/redoc", | |
| openapi_url="/openapi.json" | |
| ) | |
| # Add CORS middleware for HuggingFace Spaces | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Security | |
| security = HTTPBearer() | |
| def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)): | |
| """Verify bearer token for API access""" | |
| expected_token = os.getenv("EDGEHOMES_API_TOKEN") | |
| if not expected_token: | |
| raise HTTPException( | |
| status_code=500, | |
| detail="Server configuration error: API token not set" | |
| ) | |
| if credentials.credentials != expected_token: | |
| raise HTTPException( | |
| status_code=401, | |
| detail="Invalid authentication token", | |
| headers={"WWW-Authenticate": "Bearer"} | |
| ) | |
| return credentials.credentials | |
| # API Routes | |
| async def root(): | |
| """Root endpoint with API information""" | |
| return { | |
| "service": "EdgeHomes Embedding API", | |
| "model": "edgehomes-modernbert-v1", | |
| "version": "1.0.0", | |
| "endpoints": { | |
| "embeddings": "POST /v1/embeddings", | |
| "models": "GET /v1/models", | |
| "health": "GET /health", | |
| "docs": "GET /docs" | |
| } | |
| } | |
| async def health_check(): | |
| """Health check endpoint (no authentication required)""" | |
| return { | |
| "status": "healthy", | |
| "model": "EdgeHomes ModernBERT v1", | |
| "model_loaded": model is not None | |
| } | |
| async def create_embeddings( | |
| request: EmbeddingRequest, | |
| token: str = Depends(verify_token) | |
| ): | |
| """ | |
| Generate embeddings for input text(s). | |
| OpenAI-compatible endpoint that accepts single string or array of strings. | |
| """ | |
| # Handle both string and array inputs | |
| texts = request.input if isinstance(request.input, list) else [request.input] | |
| try: | |
| # Generate embeddings | |
| embeddings = encode_texts(texts) | |
| # Convert to OpenAI format | |
| embedding_data = [] | |
| for i, embedding in enumerate(embeddings): | |
| embedding_data.append(EmbeddingData( | |
| embedding=embedding.tolist(), | |
| index=i | |
| )) | |
| # Calculate token usage (simple word count approximation) | |
| total_tokens = sum(len(text.split()) for text in texts) | |
| return EmbeddingResponse( | |
| data=embedding_data, | |
| model=request.model, | |
| usage=Usage( | |
| prompt_tokens=total_tokens, | |
| total_tokens=total_tokens | |
| ) | |
| ) | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"Embedding generation failed: {str(e)}" | |
| ) | |
| async def list_models(token: str = Depends(verify_token)): | |
| """List available models (OpenAI-compatible)""" | |
| return { | |
| "object": "list", | |
| "data": [ | |
| { | |
| "id": "edgehomes-modernbert-v1", | |
| "object": "model", | |
| "created": int(time.time()), | |
| "owned_by": "edgehomes", | |
| "permission": [], | |
| "root": "edgehomes-modernbert-v1", | |
| "parent": None | |
| } | |
| ] | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |