CalebCampbell's picture
Add CORS middleware and proxy configuration
1909900 verified
#!/usr/bin/env python3
"""
EdgeHomes Embedding API - Pure FastAPI Implementation
=====================================================
OpenAI-compatible embedding API using EdgeHomes ModernBERT model.
Deployed as a Docker container on HuggingFace Spaces.
"""
from fastapi import FastAPI, HTTPException, Depends, Security
from fastapi.middleware.cors import CORSMiddleware
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel
from typing import List, Union
import time
import os
print("๐Ÿšจ STARTING EDGEHOMES EMBEDDING API ๐Ÿšจ")
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
# Load model
print("Loading EdgeHomes ModernBERT model...")
hf_token = os.getenv('HF_TOKEN')
if not hf_token:
raise ValueError("HF_TOKEN environment variable is required")
print(f"๐Ÿ”‘ Using HF_TOKEN: {hf_token[:10]}...")
try:
tokenizer = AutoTokenizer.from_pretrained(
'CalebCampbell/edgehomes-modernbert-v1',
token=hf_token,
trust_remote_code=True
)
model = AutoModel.from_pretrained(
'CalebCampbell/edgehomes-modernbert-v1',
token=hf_token,
trust_remote_code=True
)
print("โœ… Model loaded successfully!")
except Exception as e:
print(f"โŒ Failed to load model: {e}")
raise
def mean_pooling(model_output, attention_mask):
"""Mean pooling to get sentence embeddings"""
token_embeddings = model_output[0]
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
input_mask_expanded.sum(1), min=1e-9
)
def encode_texts(texts):
"""Encode texts to embeddings"""
encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
model_output = model(**encoded_input)
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
return sentence_embeddings.cpu().numpy()
# Pydantic models
class EmbeddingRequest(BaseModel):
input: Union[str, List[str]]
model: str = "edgehomes-modernbert-v1"
encoding_format: str = "float"
class EmbeddingData(BaseModel):
object: str = "embedding"
embedding: List[float]
index: int
class Usage(BaseModel):
prompt_tokens: int
total_tokens: int
class EmbeddingResponse(BaseModel):
object: str = "list"
data: List[EmbeddingData]
model: str
usage: Usage
# FastAPI app
app = FastAPI(
title="EdgeHomes Embedding API",
description="OpenAI-compatible embedding API using EdgeHomes ModernBERT",
version="1.0.0",
root_path="/", # For reverse proxy
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json"
)
# Add CORS middleware for HuggingFace Spaces
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Security
security = HTTPBearer()
def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)):
"""Verify bearer token for API access"""
expected_token = os.getenv("EDGEHOMES_API_TOKEN")
if not expected_token:
raise HTTPException(
status_code=500,
detail="Server configuration error: API token not set"
)
if credentials.credentials != expected_token:
raise HTTPException(
status_code=401,
detail="Invalid authentication token",
headers={"WWW-Authenticate": "Bearer"}
)
return credentials.credentials
# API Routes
@app.get("/")
async def root():
"""Root endpoint with API information"""
return {
"service": "EdgeHomes Embedding API",
"model": "edgehomes-modernbert-v1",
"version": "1.0.0",
"endpoints": {
"embeddings": "POST /v1/embeddings",
"models": "GET /v1/models",
"health": "GET /health",
"docs": "GET /docs"
}
}
@app.get("/health")
async def health_check():
"""Health check endpoint (no authentication required)"""
return {
"status": "healthy",
"model": "EdgeHomes ModernBERT v1",
"model_loaded": model is not None
}
@app.post("/v1/embeddings", response_model=EmbeddingResponse)
async def create_embeddings(
request: EmbeddingRequest,
token: str = Depends(verify_token)
):
"""
Generate embeddings for input text(s).
OpenAI-compatible endpoint that accepts single string or array of strings.
"""
# Handle both string and array inputs
texts = request.input if isinstance(request.input, list) else [request.input]
try:
# Generate embeddings
embeddings = encode_texts(texts)
# Convert to OpenAI format
embedding_data = []
for i, embedding in enumerate(embeddings):
embedding_data.append(EmbeddingData(
embedding=embedding.tolist(),
index=i
))
# Calculate token usage (simple word count approximation)
total_tokens = sum(len(text.split()) for text in texts)
return EmbeddingResponse(
data=embedding_data,
model=request.model,
usage=Usage(
prompt_tokens=total_tokens,
total_tokens=total_tokens
)
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Embedding generation failed: {str(e)}"
)
@app.get("/v1/models")
async def list_models(token: str = Depends(verify_token)):
"""List available models (OpenAI-compatible)"""
return {
"object": "list",
"data": [
{
"id": "edgehomes-modernbert-v1",
"object": "model",
"created": int(time.time()),
"owned_by": "edgehomes",
"permission": [],
"root": "edgehomes-modernbert-v1",
"parent": None
}
]
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)