Spaces:

Mazenbs
/

emmd

Running

App Files Files Community

Mazenbs commited on Dec 23, 2025

Commit

913fabc

verified ·

1 Parent(s): 5653651

Create kami.py

Browse files

Files changed (1) hide show

kami.py +120 -0

kami.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# main.py
+import os
+import re
+import time
+import uuid
+from functools import lru_cache
+from typing import List
+import numpy as np
+import onnxruntime as ort
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from transformers import AutoTokenizer
+# ------------------------------------------------------------------
+# 1. FastAPI App
+# ------------------------------------------------------------------
+app = FastAPI(
+    title="Arabic-ONNX-Embedding",
+    version="1.0.0",
+    docs_url=None,  # disable docs to save memory & latency
+    redoc_url=None,
+)
+# ------------------------------------------------------------------
+# 2. ONNX Runtime – CPU-optimised session
+# ------------------------------------------------------------------
+MODEL_PATH = "lib/intfloat_multilingual-e5-small_merged_int8.onnx"
+sess_opts = ort.SessionOptions()
+sess_opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+sess_opts.intra_op_num_threads = os.cpu_count() or 1
+sess_opts.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
+sess_opts.add_session_config_entry("session.set_denormal_as_zero", "1")
+providers = ["CPUExecutionProvider"]
+session = ort.InferenceSession(
+    MODEL_PATH, providers=providers, sess_options=sess_opts
+)
+# ------------------------------------------------------------------
+# 3. Tokenizer – load once
+# ------------------------------------------------------------------
+tokenizer = AutoTokenizer.from_pretrained(
+    "./lib", local_files_only=True, use_fast=True
+)
+# ------------------------------------------------------------------
+# 4. Normalisation – fast & cached
+# ------------------------------------------------------------------
+@lru_cache(maxsize=20_000)
+def _normalize(text: str) -> str:
+    text = re.sub(r"[ًٌٍَُِْـ]", "", text)
+    text = re.sub(r"[إأآ]", "ا", text)
+    text = re.sub(r"ى", "ي", text)
+    text = re.sub(r"ؤ", "و", text)
+    text = re.sub(r"ئ", "ي", text)
+    text = re.sub(r"ة\b", "ه", text)
+    text = re.sub(r"[^\w\s]", " ", text)
+    text = re.sub(r"\s+", " ", text)
+    return text.strip()
+# ------------------------------------------------------------------
+# 5. Core embedding – no async, no locks, pure CPU
+# ------------------------------------------------------------------
+def text_to_embedding(text: str) -> List[float]:
+    if not text or not text.strip():
+        raise ValueError("Empty text")
+    text = "query: " + _normalize(text.strip())
+    inputs = tokenizer(
+        text,
+        return_tensors="np",
+        truncation=True,
+        padding=False,  # single query → no padding
+        max_length=128,
+        return_attention_mask=True,
+        return_token_type_ids=False,
+    )
+    vec = session.run(None, dict(inputs))[1][0]  # shape: (768,)
+    norm = np.linalg.norm(vec)
+    if norm > 0:
+        vec /= norm
+    return vec.astype(np.float32).tolist()
+# ------------------------------------------------------------------
+# 6. Warm-up on startup
+# ------------------------------------------------------------------
+@app.on_event("startup")
+def _warm():
+    text_to_embedding("مرحبا")
+# ------------------------------------------------------------------
+# 7. Pydantic models
+# ------------------------------------------------------------------
+class QueryIn(BaseModel):
+    q: str = Field(..., min_length=1, max_length=256)
+class EmbeddingOut(BaseModel):
+    embedding: List[float]
+# ------------------------------------------------------------------
+# 8. Endpoint – minimal, sync, no extra middleware
+# ------------------------------------------------------------------
+@app.post("/query", response_model=EmbeddingOut)
+def query_endpoint(item: QueryIn):
+    try:
+        emb = text_to_embedding(item.q)
+        return EmbeddingOut(embedding=emb)
+    except Exception:
+        raise HTTPException(status_code=400, detail="Bad input")
+# ------------------------------------------------------------------
+# 9. Health-check (optional but lightweight)
+# ------------------------------------------------------------------
+@app.get("/health")
+def health():
+    return {"status": "ok"}