Spaces:

mycompanyajt
/

inference

Sleeping

App Files Files Community

nurulajt commited on 25 days ago

Commit

cc89204

verified ·

1 Parent(s): b810e9b

Update api.py

Browse files

Files changed (1) hide show

api.py +169 -19

api.py CHANGED Viewed

@@ -1,12 +1,14 @@
 """
 Embedding Inference API
-Supports JobBERT v2, Jina AI, and Voyage AI embeddings
 """
-from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
-from typing import List, Optional
 from sentence_transformers import SentenceTransformer
 import os
 import logging
@@ -30,8 +32,18 @@ app.add_middleware(
 MODELS = {}
 VOYAGE_API_KEY = os.environ.get('VOYAGE_API_KEY', '')
 voyage_client = None
 if VOYAGE_API_KEY:
     try:
         import voyageai
@@ -62,11 +74,52 @@ def load_models():
         logger.error(f"Error loading models: {e}")
         raise
 @app.on_event("startup")
 async def startup_event():
     load_models()
-class EmbeddingRequest(BaseModel):
     texts: List[str] = Field(..., description="List of texts to embed", min_items=1)
     model: str = Field(..., description="Model to use: 'jobbertv2', 'jobbertv3', 'jina', or 'voyage'")
     task: Optional[str] = Field(None, description="Task type for Jina AI: 'retrieval.query', 'retrieval.passage', 'text-matching', etc.")
@@ -81,7 +134,7 @@ class EmbeddingRequest(BaseModel):
             }
         }
-class EmbeddingResponse(BaseModel):
     embeddings: List[List[float]] = Field(..., description="List of embedding vectors")
     model: str = Field(..., description="Model used")
     dimension: int = Field(..., description="Embedding dimension")
@@ -91,6 +144,7 @@ class HealthResponse(BaseModel):
     status: str
     models_loaded: List[str]
     voyage_available: bool
 @app.get("/", response_model=dict)
 async def root():
@@ -100,25 +154,121 @@ async def root():
         "version": "1.0.0",
         "endpoints": {
             "/health": "Health check and available models",
-            "/embed": "Generate embeddings (POST)",
             "/docs": "API documentation"
         }
     }
 @app.get("/health", response_model=HealthResponse)
 async def health():
-    """Health check endpoint"""
     models_loaded = list(MODELS.keys())
     return {
         "status": "healthy",
         "models_loaded": models_loaded,
-        "voyage_available": voyage_client is not None
     }
-@app.post("/embed", response_model=EmbeddingResponse)
-async def create_embeddings(request: EmbeddingRequest):
     """
-    Generate embeddings for input texts
     **Models:**
     - `jobbertv2`: JobBERT-v2 (768-dim, job-specific)
@@ -147,16 +297,16 @@ async def create_embeddings(request: EmbeddingRequest):
             )
         try:
-            input_type = request.input_type or "document"
             result = voyage_client.embed(
                 texts=request.texts,
                 model="voyage-3",
-                input_type=input_type
             )
             embeddings = result.embeddings
             dimension = len(embeddings[0]) if embeddings else 0
-            return EmbeddingResponse(
                 embeddings=embeddings,
                 model="voyage-3",
                 dimension=dimension,
@@ -167,16 +317,16 @@ async def create_embeddings(request: EmbeddingRequest):
     elif model_name in MODELS:
         try:
-            model = MODELS[model_name]
             if model_name == "jina" and request.task:
-                embeddings = model.encode(
                     request.texts,
                     task=request.task,
                     convert_to_numpy=True
                 )
             else:
-                embeddings = model.encode(
                     request.texts,
                     convert_to_numpy=True
                 )
@@ -184,7 +334,7 @@ async def create_embeddings(request: EmbeddingRequest):
             embeddings_list = embeddings.tolist()
             dimension = len(embeddings_list[0]) if embeddings_list else 0
-            return EmbeddingResponse(
                 embeddings=embeddings_list,
                 model=model_name,
                 dimension=dimension,
@@ -200,7 +350,7 @@ async def create_embeddings(request: EmbeddingRequest):
         )
 @app.get("/models")
-async def list_models():
     """List available models and their specifications"""
     models_info = {
         "jobbertv2": {

 """
 Embedding Inference API
+Supports JobBERT v2/v3, Jina AI, and Voyage AI embeddings
+Compatible with Elasticsearch inference endpoint format
 """
+from fastapi import FastAPI, HTTPException, Query, Security, Depends
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
+from typing import List, Optional, Union
 from sentence_transformers import SentenceTransformer
 import os
 import logging
 MODELS = {}
 VOYAGE_API_KEY = os.environ.get('VOYAGE_API_KEY', '')
+API_KEY = os.environ.get('API_KEY', '')
+REQUIRE_API_KEY = os.environ.get('REQUIRE_API_KEY', 'false').lower() == 'true'
+security = HTTPBearer(auto_error=False)
 voyage_client = None
+logger.info(f"API Key authentication: {'ENABLED' if REQUIRE_API_KEY else 'DISABLED'}")
+if API_KEY:
+    logger.info(f"✓ API Key configured (length: {len(API_KEY)})")
+else:
+    logger.info("ℹ️  No API Key set")
 if VOYAGE_API_KEY:
     try:
         import voyageai
         logger.error(f"Error loading models: {e}")
         raise
+async def verify_api_key(credentials: Optional[HTTPAuthorizationCredentials] = Security(security)):
+    """Verify API key from Authorization header"""
+    if not REQUIRE_API_KEY:
+        return True
+    if not API_KEY:
+        raise HTTPException(
+            status_code=500,
+            detail="API key authentication is enabled but no API key is configured on the server"
+        )
+    if credentials is None:
+        raise HTTPException(
+            status_code=401,
+            detail="Missing authentication credentials. Use: Authorization: Bearer YOUR_API_KEY"
+        )
+    if credentials.credentials != API_KEY:
+        raise HTTPException(
+            status_code=403,
+            detail="Invalid API key"
+        )
+    return True
 @app.on_event("startup")
 async def startup_event():
     load_models()
+class ElasticsearchInferenceRequest(BaseModel):
+    input: Union[str, List[str]] = Field(..., description="Text or list of texts to embed")
+    class Config:
+        schema_extra = {
+            "example": {
+                "input": "Software Engineer"
+            }
+        }
+class ElasticsearchInferenceResponse(BaseModel):
+    embedding: List[float] = Field(..., description="Embedding vector for single input")
+class ElasticsearchInferenceBatchResponse(BaseModel):
+    embeddings: List[List[float]] = Field(..., description="List of embedding vectors for batch input")
+class BatchEmbeddingRequest(BaseModel):
     texts: List[str] = Field(..., description="List of texts to embed", min_items=1)
     model: str = Field(..., description="Model to use: 'jobbertv2', 'jobbertv3', 'jina', or 'voyage'")
     task: Optional[str] = Field(None, description="Task type for Jina AI: 'retrieval.query', 'retrieval.passage', 'text-matching', etc.")
             }
         }
+class BatchEmbeddingResponse(BaseModel):
     embeddings: List[List[float]] = Field(..., description="List of embedding vectors")
     model: str = Field(..., description="Model used")
     dimension: int = Field(..., description="Embedding dimension")
     status: str
     models_loaded: List[str]
     voyage_available: bool
+    api_key_required: bool
 @app.get("/", response_model=dict)
 async def root():
         "version": "1.0.0",
         "endpoints": {
             "/health": "Health check and available models",
+            "/embed": "Generate embeddings - Elasticsearch compatible (POST)",
+            "/embed/batch": "Generate batch embeddings (POST)",
+            "/models": "List available models",
             "/docs": "API documentation"
         }
     }
 @app.get("/health", response_model=HealthResponse)
 async def health():
+    """Health check endpoint (no authentication required)"""
     models_loaded = list(MODELS.keys())
     return {
         "status": "healthy",
         "models_loaded": models_loaded,
+        "voyage_available": voyage_client is not None,
+        "api_key_required": REQUIRE_API_KEY
     }
+@app.post("/embed", response_model=Union[ElasticsearchInferenceResponse, ElasticsearchInferenceBatchResponse])
+async def create_embeddings_elasticsearch(
+    request: ElasticsearchInferenceRequest,
+    model: str = Query("jobbertv3", description="Model: jobbertv2, jobbertv3, jina, or voyage"),
+    task: Optional[str] = Query(None, description="Task for Jina AI: retrieval.query, retrieval.passage, text-matching, etc."),
+    input_type: Optional[str] = Query(None, description="Input type for Voyage AI: document or query"),
+    authenticated: bool = Depends(verify_api_key)
+):
+    """
+    Generate embeddings - Elasticsearch inference endpoint compatible format
+    **Usage:**
+    - Single text: `POST /embed?model=jobbertv3` with body `{"input": "Software Engineer"}`
+    - Multiple texts: `POST /embed?model=jina` with body `{"input": ["text1", "text2"]}`
+    **Models (via query parameter):**
+    - `jobbertv2`: JobBERT-v2 (768-dim, job-specific)
+    - `jobbertv3`: JobBERT-v3 (768-dim, job-specific, improved performance) - default
+    - `jina`: Jina AI embeddings-v3 (1024-dim, general purpose)
+    - `voyage`: Voyage AI (1024-dim, requires API key)
+    **Jina AI Tasks (via query parameter):**
+    - `retrieval.query`: For search queries
+    - `retrieval.passage`: For documents/passages
+    - `text-matching`: For similarity matching (default)
+    **Voyage AI Input Types (via query parameter):**
+    - `document`: For documents/passages
+    - `query`: For search queries
+    """
+    model_name = model.lower()
+    # Handle single string or list of strings
+    is_single = isinstance(request.input, str)
+    texts = [request.input] if is_single else request.input
+    if model_name == "voyage":
+        if not voyage_client:
+            raise HTTPException(
+                status_code=503,
+                detail="Voyage AI not available. Set VOYAGE_API_KEY environment variable."
+            )
+        try:
+            voyage_input_type = input_type or "document"
+            result = voyage_client.embed(
+                texts=texts,
+                model="voyage-3",
+                input_type=voyage_input_type
+            )
+            embeddings = result.embeddings
+            if is_single:
+                return ElasticsearchInferenceResponse(embedding=embeddings[0])
+            else:
+                return ElasticsearchInferenceBatchResponse(embeddings=embeddings)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Voyage AI error: {str(e)}")
+    elif model_name in MODELS:
+        try:
+            selected_model = MODELS[model_name]
+            if model_name == "jina" and task:
+                embeddings = selected_model.encode(
+                    texts,
+                    task=task,
+                    convert_to_numpy=True
+                )
+            else:
+                embeddings = selected_model.encode(
+                    texts,
+                    convert_to_numpy=True
+                )
+            embeddings_list = embeddings.tolist()
+            if is_single:
+                return ElasticsearchInferenceResponse(embedding=embeddings_list[0])
+            else:
+                return ElasticsearchInferenceBatchResponse(embeddings=embeddings_list)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Model error: {str(e)}")
+    else:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid model '{model_name}'. Choose from: jobbertv2, jobbertv3, jina, voyage"
+        )
+@app.post("/embed/batch", response_model=BatchEmbeddingResponse)
+async def create_embeddings_batch(
+    request: BatchEmbeddingRequest,
+    authenticated: bool = Depends(verify_api_key)
+):
     """
+    Generate embeddings for multiple texts - Original batch format
     **Models:**
     - `jobbertv2`: JobBERT-v2 (768-dim, job-specific)
             )
         try:
+            voyage_input_type = request.input_type or "document"
             result = voyage_client.embed(
                 texts=request.texts,
                 model="voyage-3",
+                input_type=voyage_input_type
             )
             embeddings = result.embeddings
             dimension = len(embeddings[0]) if embeddings else 0
+            return BatchEmbeddingResponse(
                 embeddings=embeddings,
                 model="voyage-3",
                 dimension=dimension,
     elif model_name in MODELS:
         try:
+            selected_model = MODELS[model_name]
             if model_name == "jina" and request.task:
+                embeddings = selected_model.encode(
                     request.texts,
                     task=request.task,
                     convert_to_numpy=True
                 )
             else:
+                embeddings = selected_model.encode(
                     request.texts,
                     convert_to_numpy=True
                 )
             embeddings_list = embeddings.tolist()
             dimension = len(embeddings_list[0]) if embeddings_list else 0
+            return BatchEmbeddingResponse(
                 embeddings=embeddings_list,
                 model=model_name,
                 dimension=dimension,
         )
 @app.get("/models")
+async def list_models(authenticated: bool = Depends(verify_api_key)):
     """List available models and their specifications"""
     models_info = {
         "jobbertv2": {