Spaces:

xce009
/

embeddings-api

Running

App Files Files Community

Soumik Bose commited on Feb 13

Commit

967868b

0 Parent(s):

first commit

Browse files

Files changed (9) hide show

.gitignore +0 -0
Dockerfile +31 -0
README.md +7 -0
__pycache__/model_service.cpython-311.pyc +0 -0
download_setup.py +31 -0
main.py +264 -0
model_service.py +47 -0
requirements.txt +13 -0
test_local.py +103 -0

.gitignore ADDED Viewed

File without changes

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+# Use the official Python 3.11 slim image
+FROM python:3.11-slim
+# Install curl for the keep-alive script (and clean up after)
+RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
+# Set the working directory inside the container
+WORKDIR /app
+# Environment variables for optimization and logging
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONIOENCODING=UTF-8
+ENV HF_HOME=/tmp/cache
+# Copy the requirements file first
+COPY requirements.txt .
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY . .
+# Create cache directory
+RUN mkdir -p ${HF_HOME} && chmod 777 ${HF_HOME}
+# Expose port 7860 (required by Hugging Face Spaces)
+EXPOSE 7860
+# Keep-alive script + start Uvicorn with optimized workers
+CMD bash -c "while true; do curl -s https://sasasas635-database-chat.hf.space/ping >/dev/null && sleep 300; done & uvicorn main:app --host 0.0.0.0 --port 7860 --workers 4 --loop asyncio"

README.md ADDED Viewed

	@@ -0,0 +1,7 @@

+title: My Embeddings API
+emoji: 🤩
+colorFrom: orange
+colorTo: blue
+sdk: docker
+app_file: main.py
+pinned: false

__pycache__/model_service.cpython-311.pyc ADDED Viewed

Binary file (2.29 kB). View file

download_setup.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+from sentence_transformers import SentenceTransformer
+# Configuration
+MODEL_NAME = 'BAAI/bge-base-en-v1.5'  # The 768-dimension model
+SAVE_PATH = './models/bge-base-en-v1.5'
+def download_model():
+    """Download and save the embedding model locally."""
+    print(f"Downloading model: {MODEL_NAME}...")
+    # Download and load the model
+    model = SentenceTransformer(MODEL_NAME)
+    # Save it to the specific folder
+    os.makedirs(SAVE_PATH, exist_ok=True)
+    print(f"Saving model to: {SAVE_PATH}...")
+    model.save(SAVE_PATH)
+    print("✅ Model downloaded and saved successfully.")
+    # Check model file size
+    model_file = os.path.join(SAVE_PATH, 'model.safetensors')
+    if os.path.exists(model_file):
+        size_mb = os.path.getsize(model_file) / (1024 * 1024)
+        print(f"Model file size: {size_mb:.2f} MB")
+    print(f"Model dimension: {model.get_sentence_embedding_dimension()}")
+if __name__ == "__main__":
+    download_model()

main.py ADDED Viewed

	@@ -0,0 +1,264 @@

+from fastapi import FastAPI, HTTPException, Security, Depends, Header
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import List, Union, Optional
+import os
+import logging
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+import multiprocessing
+from model_service import LocalEmbeddingService
+# ============================================================================
+# LOGGING CONFIGURATION
+# ============================================================================
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+LOCAL_MODEL_PATH = os.getenv('MODEL_PATH', './models/bge-base-en-v1.5')
+AUTH_TOKEN = os.getenv('AUTH_TOKEN', None)  # Set via environment variable
+ALLOWED_ORIGINS = os.getenv('ALLOWED_ORIGINS', '*').split(',')
+# Detect CPU cores for optimal workers
+CPU_COUNT = multiprocessing.cpu_count()
+MAX_WORKERS = CPU_COUNT * 2  # 2x CPU cores for I/O-bound operations
+logger.info(f"Detected {CPU_COUNT} CPU cores. Using {MAX_WORKERS} max workers for thread pool.")
+# ============================================================================
+# FASTAPI APP INITIALIZATION
+# ============================================================================
+app = FastAPI(
+    title="BGE Embedding API",
+    description="Production-grade embedding inference API using BAAI/bge-base-en-v1.5",
+    version="2.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc"
+)
+# ============================================================================
+# CORS MIDDLEWARE
+# ============================================================================
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=ALLOWED_ORIGINS,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+logger.info(f"CORS enabled for origins: {ALLOWED_ORIGINS}")
+# ============================================================================
+# SECURITY
+# ============================================================================
+security = HTTPBearer(auto_error=False)
+async def verify_token(credentials: Optional[HTTPAuthorizationCredentials] = Security(security)):
+    """Verify Bearer token if AUTH_TOKEN is set."""
+    if AUTH_TOKEN is None:
+        # No authentication required
+        return True
+    if credentials is None:
+        logger.warning("Authentication required but no token provided")
+        raise HTTPException(
+            status_code=401,
+            detail="Authentication required",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    if credentials.credentials != AUTH_TOKEN:
+        logger.warning(f"Invalid token attempt: {credentials.credentials[:10]}...")
+        raise HTTPException(
+            status_code=401,
+            detail="Invalid authentication token",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    return True
+# ============================================================================
+# GLOBAL STATE
+# ============================================================================
+service = None
+executor = None
+@app.on_event("startup")
+async def startup_event():
+    """Load the model on startup and initialize thread pool."""
+    global service, executor
+    try:
+        logger.info("=" * 60)
+        logger.info("Starting BGE Embedding Service")
+        logger.info("=" * 60)
+        # Initialize thread pool executor for non-blocking operations
+        executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
+        logger.info(f"Thread pool executor initialized with {MAX_WORKERS} workers")
+        # Load model
+        logger.info(f"Loading model from: {LOCAL_MODEL_PATH}")
+        service = LocalEmbeddingService(LOCAL_MODEL_PATH)
+        logger.info(f"✅ Model loaded successfully! Dimension: {service.embedding_dim}")
+        # Authentication status
+        if AUTH_TOKEN:
+            logger.info("🔒 Authentication enabled (Bearer token required)")
+        else:
+            logger.warning("⚠️  Authentication disabled (no AUTH_TOKEN set)")
+        logger.info("=" * 60)
+        logger.info("Service ready to accept requests")
+        logger.info("=" * 60)
+    except Exception as e:
+        logger.error(f"❌ Failed to initialize service: {e}", exc_info=True)
+        raise
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Cleanup on shutdown."""
+    global executor
+    logger.info("Shutting down service...")
+    if executor:
+        executor.shutdown(wait=True)
+        logger.info("Thread pool executor shut down")
+    logger.info("Service shutdown complete")
+# ============================================================================
+# REQUEST/RESPONSE MODELS
+# ============================================================================
+class EmbedRequest(BaseModel):
+    text: Union[str, List[str]] = Field(
+        ...,
+        description="Single text string or list of texts to embed"
+    )
+    class Config:
+        schema_extra = {
+            "example": {
+                "text": "Ginger was also a smart giraffe. She knew what was wrong."
+            }
+        }
+class EmbedResponse(BaseModel):
+    embeddings: Union[List[float], List[List[float]]] = Field(
+        ...,
+        description="Generated embedding(s)"
+    )
+    dimension: int = Field(..., description="Embedding dimension")
+    count: int = Field(..., description="Number of texts processed")
+# ============================================================================
+# ENDPOINTS
+# ============================================================================
+@app.get("/")
+async def root():
+    """API information."""
+    return {
+        "message": "BGE Embedding API - Production Ready",
+        "model": "BAAI/bge-base-en-v1.5",
+        "dimension": 768,
+        "version": "2.0.0",
+        "authentication": "enabled" if AUTH_TOKEN else "disabled",
+        "endpoints": {
+            "health": "/health",
+            "ping": "/ping",
+            "embed": "/embed",
+            "embeddings": "/embeddings",
+            "docs": "/docs"
+        }
+    }
+@app.get("/health")
+async def health_check():
+    """Check if the service is healthy."""
+    if service is None:
+        logger.error("Health check failed: service not initialized")
+        raise HTTPException(status_code=503, detail="Service not initialized")
+    return {
+        "status": "healthy",
+        "model_dimension": service.embedding_dim,
+        "model_path": LOCAL_MODEL_PATH,
+        "max_workers": MAX_WORKERS,
+        "cpu_count": CPU_COUNT
+    }
+@app.get("/ping")
+async def ping():
+    """Simple ping endpoint for keep-alive."""
+    return {"status": "ok", "message": "pong"}
+@app.post("/embed", response_model=EmbedResponse)
+async def create_embeddings(
+    request: EmbedRequest,
+    authenticated: bool = Depends(verify_token)
+):
+    """
+    Generate embeddings for the provided text(s) - Non-blocking operation.
+    - **text**: Single string or list of strings to embed
+    Returns normalized 768-dimensional embeddings suitable for cosine similarity.
+    Requires Bearer token authentication if AUTH_TOKEN is set.
+    """
+    if service is None:
+        logger.error("Embedding request failed: service not initialized")
+        raise HTTPException(status_code=503, detail="Service not initialized")
+    try:
+        # Determine input type and count
+        is_single = isinstance(request.text, str)
+        count = 1 if is_single else len(request.text)
+        logger.info(f"Processing embedding request for {count} text(s)")
+        # Run embedding generation in thread pool (non-blocking)
+        loop = asyncio.get_event_loop()
+        embeddings = await loop.run_in_executor(
+            executor,
+            service.generate_embedding,
+            request.text
+        )
+        logger.info(f"✅ Successfully generated {count} embedding(s)")
+        return EmbedResponse(
+            embeddings=embeddings,
+            dimension=service.embedding_dim,
+            count=count
+        )
+    except Exception as e:
+        logger.error(f"❌ Embedding generation failed: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Embedding generation failed: {str(e)}"
+        )
+@app.post("/embeddings", response_model=EmbedResponse)
+async def create_embeddings_batch(
+    request: EmbedRequest,
+    authenticated: bool = Depends(verify_token)
+):
+    """
+    Alias for /embed endpoint - Non-blocking batch embedding generation.
+    Requires Bearer token authentication if AUTH_TOKEN is set.
+    """
+    return await create_embeddings(request, authenticated)

model_service.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+from typing import List, Union
+from sentence_transformers import SentenceTransformer
+class LocalEmbeddingService:
+    """Service for generating embeddings using a locally stored model."""
+    def __init__(self, model_folder: str):
+        """
+        Initialize the service by loading the model from a local path.
+        Args:
+            model_folder: Path to the folder containing the saved model
+        """
+        if not os.path.exists(model_folder):
+            raise FileNotFoundError(
+                f"Model folder not found at: {model_folder}. "
+                "Please run download_model.py first."
+            )
+        print(f"Loading model from {model_folder}...")
+        self.model = SentenceTransformer(model_folder)
+        self.embedding_dim = self.model.get_sentence_embedding_dimension()
+        print(f"✅ Model loaded successfully. Dimension: {self.embedding_dim}")
+    def generate_embedding(self, text: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
+        """
+        Generate embeddings for the given text(s).
+        Args:
+            text: A single string or list of strings to embed
+        Returns:
+            A single embedding (list of floats) or list of embeddings
+        """
+        # Encode the text with normalization for cosine similarity
+        embeddings = self.model.encode(
+            text,
+            normalize_embeddings=True,
+            convert_to_tensor=False
+        )
+        # Convert to list for JSON serialization
+        if isinstance(text, str):
+            return embeddings.tolist()
+        return embeddings.tolist()

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+# Core dependencies
+fastapi==0.115.5
+uvicorn[standard]==0.32.1
+pydantic==2.10.3
+# ML dependencies
+sentence-transformers==3.3.1
+torch==2.5.1
+numpy==1.26.4
+# Production dependencies
+python-multipart==0.0.20
+aiofiles==24.1.0

test_local.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import time
+from model_service import LocalEmbeddingService
+# Configuration
+LOCAL_MODEL_PATH = './models/bge-base-en-v1.5'
+def test_single_text():
+    """Test embedding generation for a single text."""
+    service = LocalEmbeddingService(LOCAL_MODEL_PATH)
+    text = "Ginger was also a smart giraffe. She knew what was wrong."
+    print(f"\n{'='*60}")
+    print("Testing single text embedding")
+    print(f"{'='*60}")
+    print(f"Text: '{text}'")
+    start_time = time.time()
+    vector = service.generate_embedding(text)
+    end_time = time.time()
+    print(f"\n✅ Embedding generated in {end_time - start_time:.4f} seconds")
+    print(f"Dimensions: {len(vector)}")
+    print(f"First 10 values: {vector[:10]}")
+    print(f"Vector norm (should be ~1.0): {sum(x**2 for x in vector)**0.5:.4f}")
+def test_batch_texts():
+    """Test embedding generation for multiple texts."""
+    service = LocalEmbeddingService(LOCAL_MODEL_PATH)
+    texts = [
+        "The quick brown fox jumps over the lazy dog.",
+        "Machine learning is transforming technology.",
+        "Embeddings capture semantic meaning of text."
+    ]
+    print(f"\n{'='*60}")
+    print("Testing batch text embeddings")
+    print(f"{'='*60}")
+    print(f"Number of texts: {len(texts)}")
+    start_time = time.time()
+    vectors = service.generate_embedding(texts)
+    end_time = time.time()
+    print(f"\n✅ {len(vectors)} embeddings generated in {end_time - start_time:.4f} seconds")
+    print(f"Average time per text: {(end_time - start_time) / len(texts):.4f} seconds")
+    print(f"Each embedding dimension: {len(vectors[0])}")
+    # Show first embedding sample
+    print(f"\nFirst embedding (first 10 values): {vectors[0][:10]}")
+def test_similarity():
+    """Test cosine similarity between embeddings."""
+    service = LocalEmbeddingService(LOCAL_MODEL_PATH)
+    texts = [
+        "The cat sits on the mat.",
+        "A feline rests on the rug.",  # Similar meaning
+        "Python is a programming language."  # Different meaning
+    ]
+    print(f"\n{'='*60}")
+    print("Testing semantic similarity")
+    print(f"{'='*60}")
+    vectors = service.generate_embedding(texts)
+    # Calculate cosine similarities (vectors are already normalized)
+    def cosine_sim(v1, v2):
+        return sum(a * b for a, b in zip(v1, v2))
+    sim_01 = cosine_sim(vectors[0], vectors[1])
+    sim_02 = cosine_sim(vectors[0], vectors[2])
+    print(f"\nText 1: '{texts[0]}'")
+    print(f"Text 2: '{texts[1]}'")
+    print(f"Similarity: {sim_01:.4f} (similar meaning)")
+    print(f"\nText 1: '{texts[0]}'")
+    print(f"Text 3: '{texts[2]}'")
+    print(f"Similarity: {sim_02:.4f} (different meaning)")
+    print(f"\n✅ As expected, similar texts have higher similarity!")
+def main():
+    """Run all tests."""
+    try:
+        test_single_text()
+        test_batch_texts()
+        test_similarity()
+        print(f"\n{'='*60}")
+        print("✅ All tests completed successfully!")
+        print(f"{'='*60}\n")
+    except FileNotFoundError:
+        print("\n❌ Model not found. Please run download_model.py first.")
+    except Exception as e:
+        print(f"\n❌ An error occurred: {e}")
+if __name__ == "__main__":
+    main()