Spaces:

fahmiaziz
/

api-embedding

Running

App Files Files Community

fahmiaziz98 commited on Nov 5

Commit

2e2b2b3

1 Parent(s): 376886a

[UPDATE]: Input str or List[str]

Browse files

Files changed (2) hide show

src/api/routers/embedding.py +21 -26
src/models/schemas/requests.py +5 -7

src/api/routers/embedding.py CHANGED Viewed

@@ -27,11 +27,8 @@ from src.core.exceptions import (
 from src.api.dependencies import get_model_manager
 from src.utils.validators import (
     extract_embedding_kwargs,
-    validate_texts,
     count_tokens_batch,
 )
-from src.config.settings import get_settings
 router = APIRouter(tags=["embeddings"])
@@ -64,10 +61,8 @@ def _ensure_model_type(config, expected_type: str, model_id: str) -> None:
     summary="Generate single/batch embeddings",
     description="Generate embeddings for multiple texts in a single request",
 )
-async def create_embeddings_document(
-    request: EmbedRequest,
-    manager: ModelManager = Depends(get_model_manager),
-    settings=Depends(get_settings),
 ):
     """
     Generate embeddings for multiple texts.
@@ -79,13 +74,11 @@ async def create_embeddings_document(
     Raises:
         HTTPException: On validation or generation errors
     """
     try:
-        # Validate input
-        validate_texts(
-            request.input,
-            max_length=settings.MAX_TEXT_LENGTH,
-            max_batch_size=settings.MAX_BATCH_SIZE,
-        )
         kwargs = extract_embedding_kwargs(request)
         model = manager.get_model(request.model)
@@ -95,7 +88,7 @@ async def create_embeddings_document(
         start_time = time.time()
-        embeddings = model.embed(input=request.input, **kwargs)
         processing_time = time.time() - start_time
         data = [
@@ -108,8 +101,8 @@ async def create_embeddings_document(
         ]
         token_usage = TokenUsage(
-            prompt_tokens=count_tokens_batch(request.input),
-            total_tokens=count_tokens_batch(request.input),
         )
         response = DenseEmbedResponse(
@@ -120,8 +113,8 @@ async def create_embeddings_document(
         )
         logger.info(
-            f"Generated {len(request.input)} embeddings "
-            f"in {processing_time:.3f}s ({len(request.input) / processing_time:.1f} texts/s)"
         )
         return response
@@ -133,10 +126,10 @@ async def create_embeddings_document(
     except EmbeddingGenerationError as e:
         raise HTTPException(status_code=e.status_code, detail=e.message)
     except Exception as e:
-        logger.exception("Unexpected error in create_embeddings_document")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"Failed to create batch embeddings: {str(e)}",
         )
@@ -160,8 +153,10 @@ async def create_sparse_embedding(
     Raises:
         HTTPException: On validation or generation errors
     """
     try:
-        validate_texts(request.input)
         kwargs = extract_embedding_kwargs(request)
         model = manager.get_model(request.model)
@@ -171,12 +166,12 @@ async def create_sparse_embedding(
         start_time = time.time()
-        sparse_results = model.embed(input=request.input, **kwargs)
         processing_time = time.time() - start_time
         sparse_embeddings = [
             SparseEmbedding(
-                text=request.input[idx],
                 indices=sparse_result["indices"],
                 values=sparse_result["values"],
             )
@@ -190,8 +185,8 @@ async def create_sparse_embedding(
         )
         logger.info(
-            f"Generated {len(request.input)} embeddings "
-            f"in {processing_time:.3f}s ({len(request.input) / processing_time:.1f} texts/s)"
         )
         return response
@@ -206,5 +201,5 @@ async def create_sparse_embedding(
         logger.exception("Unexpected error in create_sparse_embedding")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"Failed to create query embedding: {str(e)}",
         )

 from src.api.dependencies import get_model_manager
 from src.utils.validators import (
     extract_embedding_kwargs,
     count_tokens_batch,
 )
 router = APIRouter(tags=["embeddings"])
     summary="Generate single/batch embeddings",
     description="Generate embeddings for multiple texts in a single request",
 )
+async def create_embeddings(
+    request: EmbedRequest, manager: ModelManager = Depends(get_model_manager)
 ):
     """
     Generate embeddings for multiple texts.
     Raises:
         HTTPException: On validation or generation errors
     """
+    if isinstance(request.input, str):
+        texts = [request.input]
     try:
         kwargs = extract_embedding_kwargs(request)
         model = manager.get_model(request.model)
         start_time = time.time()
+        embeddings = model.embed(input=texts, **kwargs)
         processing_time = time.time() - start_time
         data = [
         ]
         token_usage = TokenUsage(
+            prompt_tokens=count_tokens_batch(texts),
+            total_tokens=count_tokens_batch(texts),
         )
         response = DenseEmbedResponse(
         )
         logger.info(
+            f"Generated {len(texts)} embeddings "
+            f"in {processing_time:.3f}s ({len(texts) / processing_time:.1f} texts/s)"
         )
         return response
     except EmbeddingGenerationError as e:
         raise HTTPException(status_code=e.status_code, detail=e.message)
     except Exception as e:
+        logger.exception("Unexpected error in create_embeddings")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to create embeddings: {str(e)}",
         )
     Raises:
         HTTPException: On validation or generation errors
     """
+    if isinstance(request.input, str):
+        texts = [request.input]
     try:
         kwargs = extract_embedding_kwargs(request)
         model = manager.get_model(request.model)
         start_time = time.time()
+        sparse_results = model.embed(input=texts, **kwargs)
         processing_time = time.time() - start_time
         sparse_embeddings = [
             SparseEmbedding(
+                text=texts[idx],
                 indices=sparse_result["indices"],
                 values=sparse_result["values"],
             )
         )
         logger.info(
+            f"Generated {len(texts)} embeddings "
+            f"in {processing_time:.3f}s ({len(texts) / processing_time:.1f} texts/s)"
         )
         return response
         logger.exception("Unexpected error in create_sparse_embedding")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to create sparse embedding: {str(e)}",
         )

src/models/schemas/requests.py CHANGED Viewed

@@ -5,7 +5,7 @@ This module defines all Pydantic models for incoming API requests,
 with validation and documentation.
 """
-from typing import List, Optional, Literal
 from pydantic import BaseModel, Field, field_validator, ConfigDict
 from .common import EmbeddingOptions
@@ -55,18 +55,18 @@ class EmbedRequest(BaseEmbedRequest):
     Used for /embeddings and /embed_sparse endpoint to process multiple texts at once.
     Attributes:
-        input: List of input texts to embed
     """
-    input: List[str] = Field(
         ...,
-        description="List of input texts to generate embeddings for",
         min_length=1,
     )
     @field_validator("input")
     @classmethod
-    def validate_texts(cls, v: List[str]) -> List[str]:
         """Validate that all texts are non-empty."""
         if not v:
             raise ValueError("texts list cannot be empty")
@@ -81,8 +81,6 @@ class EmbedRequest(BaseEmbedRequest):
                 raise ValueError(f"texts[{idx}] must be a string")
             if not text.strip():
                 raise ValueError(f"texts[{idx}] cannot be empty or whitespace only")
-            if len(text) > 8192:
-                raise ValueError(f"texts[{idx}] exceeds maximum length (8192)")
             validated.append(text)
         return validated

 with validation and documentation.
 """
+from typing import List, Optional, Literal, Union
 from pydantic import BaseModel, Field, field_validator, ConfigDict
 from .common import EmbeddingOptions
     Used for /embeddings and /embed_sparse endpoint to process multiple texts at once.
     Attributes:
+        input: Str or List of input texts to embed
     """
+    input: Union[str, List[str]] = Field(
         ...,
+        description="Str or List of input texts to generate embeddings for",
         min_length=1,
     )
     @field_validator("input")
     @classmethod
+    def validate_texts(cls, v: Union[str, List[str]]) -> List[str]:
         """Validate that all texts are non-empty."""
         if not v:
             raise ValueError("texts list cannot be empty")
                 raise ValueError(f"texts[{idx}] must be a string")
             if not text.strip():
                 raise ValueError(f"texts[{idx}] cannot be empty or whitespace only")
             validated.append(text)
         return validated