Spaces:

chmielvu
/

ollama-code-embed

Sleeping

App Files Files Community

chmielvu commited on 26 days ago

Commit

253dbcb

verified ·

1 Parent(s): d521c64

Add OpenAI embeddings compatibility and Ollama aliases

Browse files

Files changed (2) hide show

__pycache__/app.cpython-312.pyc +0 -0
app.py +63 -18

__pycache__/app.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import time
 from typing import Any
 import numpy as np
 import torch
 from fastapi import FastAPI, HTTPException
@@ -14,6 +15,12 @@ torch.set_num_threads(2)
 APP_TITLE = "ollama-code-embed"
 MODEL_ID = "jinaai/jina-code-embeddings-0.5b"
 MODEL_NAME = "code-embed"
 MODEL_CREATED_AT = "2026-03-11T00:00:00Z"
 MODEL_DIMENSIONS = 896
 SERVER_VERSION = "0.11.0"
@@ -55,6 +62,14 @@ class EmbedRequest(CompatibleRequest):
     keep_alive: str | int | None = None
 def get_model() -> SentenceTransformer:
     global _model, _loaded_at_ns, _load_duration_ns
     if _model is None:
@@ -78,6 +93,10 @@ def normalize_inputs(request: EmbedRequest) -> list[str]:
     raise HTTPException(status_code=400, detail="Request must include 'input' or 'prompt'")
 def maybe_truncate(vector: np.ndarray, dimensions: int | None) -> np.ndarray:
     if dimensions is None or dimensions <= 0 or dimensions >= vector.shape[0]:
         return vector
@@ -88,6 +107,11 @@ def maybe_truncate(vector: np.ndarray, dimensions: int | None) -> np.ndarray:
     return truncated
 def estimate_prompt_eval_count(texts: list[str], model: SentenceTransformer) -> int:
     tokenizer = getattr(model, "tokenizer", None)
     if tokenizer is None:
@@ -135,7 +159,7 @@ def api_version() -> dict[str, str]:
 @app.get("/api/tags")
 def api_tags() -> dict[str, Any]:
-    return {"models": [model_card(MODEL_NAME), model_card(MODEL_ID)]}
 @app.get("/api/ps")
@@ -160,8 +184,7 @@ def api_ps() -> dict[str, Any]:
 @app.post("/api/show")
 def api_show(request: EmbedRequest) -> dict[str, Any]:
-    if request.model not in {MODEL_NAME, MODEL_ID}:
-        raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found")
     return {
         "license": "cc-by-nc-4.0",
         "modelfile": f"FROM {MODEL_ID}",
@@ -182,25 +205,14 @@ def v1_models() -> dict[str, Any]:
     return {
         "object": "list",
         "data": [
-            {
-                "id": MODEL_NAME,
-                "object": "model",
-                "created": now,
-                "owned_by": "chmielvu",
-            },
-            {
-                "id": MODEL_ID,
-                "object": "model",
-                "created": now,
-                "owned_by": "chmielvu",
-            },
         ],
     }
 def embed_impl(request: EmbedRequest) -> dict[str, Any]:
-    if request.model not in {MODEL_NAME, MODEL_ID}:
-        raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found")
     texts = normalize_inputs(request)
     model = get_model()
@@ -209,7 +221,7 @@ def embed_impl(request: EmbedRequest) -> dict[str, Any]:
     total_duration = time.perf_counter_ns() - started
     payload = [maybe_truncate(vector, request.dimensions).astype(np.float32).tolist() for vector in vectors]
     return {
-        "model": MODEL_NAME,
         "embeddings": payload,
         "total_duration": total_duration,
         "load_duration": _load_duration_ns,
@@ -234,3 +246,36 @@ def api_embeddings(request: EmbedRequest) -> dict[str, Any]:
         "load_duration": result["load_duration"],
         "prompt_eval_count": result["prompt_eval_count"],
     }

 import time
 from typing import Any
+import base64
 import numpy as np
 import torch
 from fastapi import FastAPI, HTTPException
 APP_TITLE = "ollama-code-embed"
 MODEL_ID = "jinaai/jina-code-embeddings-0.5b"
 MODEL_NAME = "code-embed"
+MODEL_ALIASES = [
+    MODEL_NAME,
+    f"{MODEL_NAME}:latest",
+    MODEL_ID,
+    f"{MODEL_ID}:latest",
+]
 MODEL_CREATED_AT = "2026-03-11T00:00:00Z"
 MODEL_DIMENSIONS = 896
 SERVER_VERSION = "0.11.0"
     keep_alive: str | int | None = None
+class OpenAIEmbeddingRequest(CompatibleRequest):
+    model: str = MODEL_ID
+    input: str | list[str]
+    encoding_format: str = "float"
+    dimensions: int | None = None
+    user: str | None = None
 def get_model() -> SentenceTransformer:
     global _model, _loaded_at_ns, _load_duration_ns
     if _model is None:
     raise HTTPException(status_code=400, detail="Request must include 'input' or 'prompt'")
+def normalize_openai_inputs(request: OpenAIEmbeddingRequest) -> list[str]:
+    return request.input if isinstance(request.input, list) else [request.input]
 def maybe_truncate(vector: np.ndarray, dimensions: int | None) -> np.ndarray:
     if dimensions is None or dimensions <= 0 or dimensions >= vector.shape[0]:
         return vector
     return truncated
+def validate_model_name(model_name: str) -> None:
+    if model_name not in MODEL_ALIASES:
+        raise HTTPException(status_code=404, detail=f"Model '{model_name}' not found")
 def estimate_prompt_eval_count(texts: list[str], model: SentenceTransformer) -> int:
     tokenizer = getattr(model, "tokenizer", None)
     if tokenizer is None:
 @app.get("/api/tags")
 def api_tags() -> dict[str, Any]:
+    return {"models": [model_card(name) for name in MODEL_ALIASES]}
 @app.get("/api/ps")
 @app.post("/api/show")
 def api_show(request: EmbedRequest) -> dict[str, Any]:
+    validate_model_name(request.model)
     return {
         "license": "cc-by-nc-4.0",
         "modelfile": f"FROM {MODEL_ID}",
     return {
         "object": "list",
         "data": [
+            {"id": model_name, "object": "model", "created": now, "owned_by": "chmielvu"}
+            for model_name in MODEL_ALIASES
         ],
     }
 def embed_impl(request: EmbedRequest) -> dict[str, Any]:
+    validate_model_name(request.model)
     texts = normalize_inputs(request)
     model = get_model()
     total_duration = time.perf_counter_ns() - started
     payload = [maybe_truncate(vector, request.dimensions).astype(np.float32).tolist() for vector in vectors]
     return {
+        "model": request.model,
         "embeddings": payload,
         "total_duration": total_duration,
         "load_duration": _load_duration_ns,
         "load_duration": result["load_duration"],
         "prompt_eval_count": result["prompt_eval_count"],
     }
+@app.post("/v1/embeddings")
+def v1_embeddings(request: OpenAIEmbeddingRequest) -> dict[str, Any]:
+    validate_model_name(request.model)
+    texts = normalize_openai_inputs(request)
+    model = get_model()
+    started = time.perf_counter_ns()
+    vectors = np.asarray(model.encode(texts, convert_to_numpy=True))
+    total_duration = time.perf_counter_ns() - started
+    data = []
+    for idx, vector in enumerate(vectors):
+        vector = maybe_truncate(vector, request.dimensions).astype(np.float32)
+        embedding: list[float] | str
+        if request.encoding_format == "base64":
+            embedding = base64.b64encode(vector.tobytes()).decode("ascii")
+        else:
+            embedding = vector.tolist()
+        data.append({"object": "embedding", "index": idx, "embedding": embedding})
+    prompt_tokens = estimate_prompt_eval_count(texts, model)
+    return {
+        "object": "list",
+        "model": request.model,
+        "data": data,
+        "usage": {
+            "prompt_tokens": prompt_tokens,
+            "total_tokens": prompt_tokens,
+        },
+        "load_duration": _load_duration_ns,
+        "total_duration": total_duration,
+    }