Spaces:

emilbm
/

text-embedding

Sleeping

App Files Files Community

emilbm commited on Oct 14, 2025

Commit

8f842e4

0 Parent(s):

Init FastAPI python project

Browse files

Files changed (16) hide show

.github/workflows/deploy.yml +51 -0
.gitignore +11 -0
.python-version +1 -0
Dockerfile +15 -0
Makefile +15 -0
README.md +141 -0
app/__init__.py +0 -0
app/embeddings.py +34 -0
app/logger.py +5 -0
app/main.py +28 -0
app/models.py +42 -0
pyproject.toml +19 -0
tests/__init__.py +0 -0
tests/test_api.py +26 -0
tests/test_embeddings.py +56 -0
uv.lock +0 -0

.github/workflows/deploy.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+name: test-and-deploy
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+jobs:
+  build-and-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          version: "0.9.2"
+      - name: Set up Python
+        run: uv python install
+      - name: Install dependencies
+        run: uv sync --locked --all-extras --dev
+      - name: Run linting
+        run: make lint
+      - name: Run tests
+        run: make test
+  deploy-to-hf:
+    needs: build-and-test
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Deploy to Hugging Face Space
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_USERNAME: ${{ secrets.HF_USERNAME }}
+          HF_SPACE: ${{ secrets.HF_SPACE }}
+        run: |
+          git push --force https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$HF_SPACE main

.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+.DS_Store
+# Virtual environments
+.venv

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.12-slim
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user pyproject.toml ./
+RUN pip install --no-cache-dir .
+COPY --chown=user app ./app
+# Start the app with Uvicorn
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

Makefile ADDED Viewed

	@@ -0,0 +1,15 @@

+APP_DIR := $(CURDIR)/app
+TESTS_DIR := $(CURDIR)/tests
+format:
+	uv run black $(APP_DIR) $(TESTS_DIR)/*.py
+	uv run ruff check $(APP_DIR) $(TESTS_DIR) --fix
+lint:
+	uv run black --check $(APP_DIR) $(TESTS_DIR)/*.py
+	uv run ruff check $(APP_DIR) $(TESTS_DIR)
+	uv run mypy $(APP_DIR) $(TESTS_DIR)
+test:
+	uv run pytest ${TESTS_DIR}

README.md ADDED Viewed

	@@ -0,0 +1,141 @@

+---
+title: Text Embedding
+emoji: 🚀
+colorFrom: purple
+colorTo: yellow
+sdk: docker
+pinned: false
+license: apache-2.0
+---
+# Embedding API
+API to call an embedding model ([intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large)) for generating multilingual text embeddings.<br>
+The embedding model takes a text string and converts it into 1024 dimension vector.<br>
+Using a `POST` request to the `/embed` endpoint with a list of texts, the API returns their corresponding embeddings.<br>
+A maximum of 2000 characters per text is enforced to avoid truncation, and thereby loss of information, by the tokenizer.<br>
+Each text must start with either "query: " or "passage: ".<br>
+The API is deployed at a Hugging Face Docker space where the Swagger UI can be acccessed at:<br>
+[https://emilbm-text-embedding.hf.space/docs](https://emilbm-text-embedding.hf.space/docs)
+## Features
+- FastAPI-based REST API
+- `/embed` endpoint for generating embeddings from a list of texts
+- `/health` endpoint for checking the API status
+- Uses HuggingFace Transformers and PyTorch
+- Includes linting and unit tests
+- Dockerfile for containerization
+- CI/CD with GitHub Actions to build, lint, test, and deploy to Hugging Face
+## Local Development
+### Requirements
+- Python 3.12+
+- [UV](https://docs.astral.sh/uv/)
+- (Optional) Docker
+### Installation
+1. **Clone the repository:**
+	 ```sh
+	 git clone <your-repo-url>
+	 cd embedding-api
+	 ```
+2. **Create a virtual environment and activate it:**
+	 ```sh
+	 uv venv
+	 source .venv/bin/activate
+	 ```
+3. **Install dependencies:**
+	 ```sh
+	 uv sync
+	 ```
+### Formatting, Linting and Unit Tests
+- **Formatting (with Black and Ruff) and linting (with Black, Ruff, and MyPy):**
+	```sh
+	make format
+	make lint
+	```
+- **Run unit tests:**
+	```sh
+	make test
+	```
+### Running Locally (without Docker)
+Start the API server with Uvicorn:
+```sh
+uvicorn app.main:app --reload --port 7860
+```
+### Running Locally (with Docker)
+Build and start the API server with Docker:
+```sh
+docker build -t embedding-api .
+docker run -p 7860:7860 embedding-api
+```
+### Test the endpoint
+Test the endpoint with either:
+```sh
+curl -X 'POST' \
+  'http://127.0.0.1:7860/embed' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "texts": [
+    "query: what is the capital of France?",
+    "passage: Paris is the capital of France."
+  ]
+}'
+```
+Or through the Swagger UI.
+## Usage
+### Embed Endpoint
+- **POST** `/embed`
+- **Request Body:**
+	```json
+	{
+		"texts": ["Hello world", "Hej verden"]
+	}
+	```
+- **Response:**
+	```json
+	{
+		"embeddings": [[...], [...]]
+	}
+	```
+### Health Endpoint
+- **GET** `/health`
+- **Response:**
+	```json
+	{
+		"status": "ok"
+	}
+	```
+## Project Structure
+```
+app/
+		main.py            # FastAPI app
+		embeddings.py      # Embedding logic
+		models.py          # Request/response models
+		logger.py          # Logging setup
+tests/
+		test_api.py        # API tests
+		test_embeddings.py # Embedding tests
+```

app/__init__.py ADDED Viewed

File without changes

app/embeddings.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from transformers import AutoTokenizer, AutoModel
+from torch import Tensor
+model = AutoModel.from_pretrained("intfloat/multilingual-e5-large")
+tokenizer = AutoTokenizer.from_pretrained("intfloat/multilingual-e5-large")
+def average_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
+    """Average pool the token embeddings."""
+    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
+    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
+def embed_text(texts: list[str]) -> list[list[float]]:
+    """
+    Generate embeddings for a list of texts.
+    The model supports a maximum of 512 tokens per input which typically corresponds to about 2000-2500 characters.
+    To avoid losing important information, we set a limit of 2000 characters per input text.
+    """
+    if not texts:
+        raise ValueError("No input texts provided.")
+    if any(len(text) > 2000 for text in texts):
+        raise ValueError(
+            "One or more input texts exceed the maximum length of 2000 characters."
+        )
+    batch_dict = tokenizer(
+        texts, max_length=512, padding=True, truncation=True, return_tensors="pt"
+    )
+    outputs = model(**batch_dict)
+    embeddings = average_pool(outputs.last_hidden_state, batch_dict["attention_mask"])
+    return embeddings.detach().cpu().tolist()

app/logger.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
+)

app/main.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from fastapi import FastAPI, HTTPException
+from app.models import EmbedRequest, EmbedResponse
+from app.embeddings import embed_text
+import logging
+app = FastAPI(
+    title="Embedding API",
+    description="A simple API to generate text embeddings using Microsoft's `multilingual-e5-large` model.",
+    version="1.0.0",
+)
+logger = logging.getLogger(__name__)
+@app.post("/embed", response_model=EmbedResponse)
+async def embed(request: EmbedRequest):
+    """Generate embeddings for a list of texts."""
+    try:
+        vectors = embed_text(request.texts)
+        return {"embeddings": vectors}
+    except Exception as e:
+        logger.exception("Error generating embeddings")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {"status": "ok"}

app/models.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from pydantic import BaseModel, Field, field_validator, StringConstraints
+from typing import Annotated
+PREFIX_ACCEPTED = ["query: ", "passage: "]
+ShortText = Annotated[str, StringConstraints(max_length=2000)]
+class EmbedRequest(BaseModel):
+    """
+    Request model for texts to be embedded.
+    Each text must start with an accepted prefix and be ≤ 2000 characters.
+    The texts need to start with either "query: " or "passage: ".
+    """
+    texts: list[ShortText] = Field(
+        ...,
+        json_schema_extra={
+            "example": [
+                "query: what is the capital of France?",
+                "passage: Paris is the capital of France.",
+            ]
+        },
+        description="List of texts to be embedded (≤ 2000 characters each) and must start with 'query: ' or 'passage: '.",
+    )
+    @field_validator("texts")
+    @classmethod
+    def check_prefixes(cls, texts: list[str]) -> list[str]:
+        for t in texts:
+            if not any(t.startswith(prefix) for prefix in PREFIX_ACCEPTED):
+                raise ValueError(f"Each text must start with one of {PREFIX_ACCEPTED}")
+        return texts
+class EmbedResponse(BaseModel):
+    """Response model containing embeddings."""
+    embeddings: list[list[float]] = Field(
+        ...,
+        description="List of embedding vectors corresponding to the input texts. Each embedding is a list of floats with length 1024.",
+    )

pyproject.toml ADDED Viewed

	@@ -0,0 +1,19 @@

+[project]
+name = "embedding-api"
+version = "1.0.0"
+description = "API to call an embedding model"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "black>=25.9.0",
+    "fastapi>=0.119.0",
+    "httpx>=0.28.1",
+    "mypy>=1.18.2",
+    "pydantic>=2.12.0",
+    "pytest>=8.4.2",
+    "ruff>=0.14.0",
+    "torch>=2.8.0",
+    "transformers>=4.57.0",
+    "uvicorn>=0.37.0",
+]

tests/__init__.py ADDED Viewed

File without changes

tests/test_api.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from fastapi.testclient import TestClient
+from app.main import app
+client = TestClient(app)
+def test_embed():
+    """Test the /embed endpoint with valid input."""
+    response = client.post("/embed", json={"texts": ["query: Hello world"]})
+    assert response.status_code == 200  # OK
+    data = response.json()
+    assert "embeddings" in data
+    assert len(data["embeddings"][0]) == 1024
+def test_embed_no_texts():
+    """Test the /embed endpoint with no texts provided."""
+    response = client.post("/embed", json={})
+    assert response.status_code == 422  # Unprocessable Entity
+def test_embed_long_text():
+    """Test the /embed endpoint with a text longer than 2000 characters."""
+    long_text = "query: " + "a" * 1994  # 2001 characters
+    response = client.post("/embed", json={"texts": [long_text]})
+    assert response.status_code == 422  # Unprocessable Entity

tests/test_embeddings.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from app.embeddings import average_pool, embed_text
+import torch
+import pytest
+def test_average_pool_basic():
+    """Test average pooling produces correct shape and masking."""
+    last_hidden_states = torch.tensor(
+        [
+            [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
+            [[10.0, 20.0], [30.0, 40.0], [50.0, 60.0]],
+        ]
+    )  # shape: (2, 3, 2)
+    attention_mask = torch.tensor(
+        [
+            [1, 1, 0],
+            [1, 0, 0],
+        ]
+    )  # shape: (2, 3)
+    result = average_pool(last_hidden_states, attention_mask)
+    # Expected averages:
+    # row1: [(1+3)/2, (2+4)/2] = [2,3]
+    # row2: [10, 20]
+    expected = torch.tensor([[2.0, 3.0], [10.0, 20.0]])
+    assert torch.allclose(result, expected, atol=1e-6)
+    assert result.shape == (2, 2)
+def test_embed_text_valid():
+    """Test embedding returns correct number of vectors and dimensions."""
+    texts = ["query: Hello world", "query: Hej verden"]
+    embeddings = embed_text(texts)
+    # Assertions
+    assert isinstance(embeddings, list)
+    assert len(embeddings) == len(texts)
+    assert all(isinstance(vec, list) for vec in embeddings)
+    assert all(isinstance(x, float) for x in embeddings[0])
+    assert len(embeddings[0]) == 1024
+def test_embed_text_empty_list():
+    """Should raise ValueError if no input texts."""
+    with pytest.raises(ValueError, match="No input texts provided"):
+        embed_text([])
+def test_embed_text_too_long():
+    """Should raise ValueError for inputs exceeding 2000 characters."""
+    too_long = ["query: " + "a" * 1994]  # 2001 characters
+    with pytest.raises(ValueError, match="exceed the maximum length"):
+        embed_text(too_long)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff