chmielvu commited on
Commit
253dbcb
·
verified ·
1 Parent(s): d521c64

Add OpenAI embeddings compatibility and Ollama aliases

Browse files
Files changed (2) hide show
  1. __pycache__/app.cpython-312.pyc +0 -0
  2. app.py +63 -18
__pycache__/app.cpython-312.pyc CHANGED
Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ
 
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import time
2
  from typing import Any
3
 
 
4
  import numpy as np
5
  import torch
6
  from fastapi import FastAPI, HTTPException
@@ -14,6 +15,12 @@ torch.set_num_threads(2)
14
  APP_TITLE = "ollama-code-embed"
15
  MODEL_ID = "jinaai/jina-code-embeddings-0.5b"
16
  MODEL_NAME = "code-embed"
 
 
 
 
 
 
17
  MODEL_CREATED_AT = "2026-03-11T00:00:00Z"
18
  MODEL_DIMENSIONS = 896
19
  SERVER_VERSION = "0.11.0"
@@ -55,6 +62,14 @@ class EmbedRequest(CompatibleRequest):
55
  keep_alive: str | int | None = None
56
 
57
 
 
 
 
 
 
 
 
 
58
  def get_model() -> SentenceTransformer:
59
  global _model, _loaded_at_ns, _load_duration_ns
60
  if _model is None:
@@ -78,6 +93,10 @@ def normalize_inputs(request: EmbedRequest) -> list[str]:
78
  raise HTTPException(status_code=400, detail="Request must include 'input' or 'prompt'")
79
 
80
 
 
 
 
 
81
  def maybe_truncate(vector: np.ndarray, dimensions: int | None) -> np.ndarray:
82
  if dimensions is None or dimensions <= 0 or dimensions >= vector.shape[0]:
83
  return vector
@@ -88,6 +107,11 @@ def maybe_truncate(vector: np.ndarray, dimensions: int | None) -> np.ndarray:
88
  return truncated
89
 
90
 
 
 
 
 
 
91
  def estimate_prompt_eval_count(texts: list[str], model: SentenceTransformer) -> int:
92
  tokenizer = getattr(model, "tokenizer", None)
93
  if tokenizer is None:
@@ -135,7 +159,7 @@ def api_version() -> dict[str, str]:
135
 
136
  @app.get("/api/tags")
137
  def api_tags() -> dict[str, Any]:
138
- return {"models": [model_card(MODEL_NAME), model_card(MODEL_ID)]}
139
 
140
 
141
  @app.get("/api/ps")
@@ -160,8 +184,7 @@ def api_ps() -> dict[str, Any]:
160
 
161
  @app.post("/api/show")
162
  def api_show(request: EmbedRequest) -> dict[str, Any]:
163
- if request.model not in {MODEL_NAME, MODEL_ID}:
164
- raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found")
165
  return {
166
  "license": "cc-by-nc-4.0",
167
  "modelfile": f"FROM {MODEL_ID}",
@@ -182,25 +205,14 @@ def v1_models() -> dict[str, Any]:
182
  return {
183
  "object": "list",
184
  "data": [
185
- {
186
- "id": MODEL_NAME,
187
- "object": "model",
188
- "created": now,
189
- "owned_by": "chmielvu",
190
- },
191
- {
192
- "id": MODEL_ID,
193
- "object": "model",
194
- "created": now,
195
- "owned_by": "chmielvu",
196
- },
197
  ],
198
  }
199
 
200
 
201
  def embed_impl(request: EmbedRequest) -> dict[str, Any]:
202
- if request.model not in {MODEL_NAME, MODEL_ID}:
203
- raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found")
204
 
205
  texts = normalize_inputs(request)
206
  model = get_model()
@@ -209,7 +221,7 @@ def embed_impl(request: EmbedRequest) -> dict[str, Any]:
209
  total_duration = time.perf_counter_ns() - started
210
  payload = [maybe_truncate(vector, request.dimensions).astype(np.float32).tolist() for vector in vectors]
211
  return {
212
- "model": MODEL_NAME,
213
  "embeddings": payload,
214
  "total_duration": total_duration,
215
  "load_duration": _load_duration_ns,
@@ -234,3 +246,36 @@ def api_embeddings(request: EmbedRequest) -> dict[str, Any]:
234
  "load_duration": result["load_duration"],
235
  "prompt_eval_count": result["prompt_eval_count"],
236
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import time
2
  from typing import Any
3
 
4
+ import base64
5
  import numpy as np
6
  import torch
7
  from fastapi import FastAPI, HTTPException
 
15
  APP_TITLE = "ollama-code-embed"
16
  MODEL_ID = "jinaai/jina-code-embeddings-0.5b"
17
  MODEL_NAME = "code-embed"
18
+ MODEL_ALIASES = [
19
+ MODEL_NAME,
20
+ f"{MODEL_NAME}:latest",
21
+ MODEL_ID,
22
+ f"{MODEL_ID}:latest",
23
+ ]
24
  MODEL_CREATED_AT = "2026-03-11T00:00:00Z"
25
  MODEL_DIMENSIONS = 896
26
  SERVER_VERSION = "0.11.0"
 
62
  keep_alive: str | int | None = None
63
 
64
 
65
+ class OpenAIEmbeddingRequest(CompatibleRequest):
66
+ model: str = MODEL_ID
67
+ input: str | list[str]
68
+ encoding_format: str = "float"
69
+ dimensions: int | None = None
70
+ user: str | None = None
71
+
72
+
73
  def get_model() -> SentenceTransformer:
74
  global _model, _loaded_at_ns, _load_duration_ns
75
  if _model is None:
 
93
  raise HTTPException(status_code=400, detail="Request must include 'input' or 'prompt'")
94
 
95
 
96
+ def normalize_openai_inputs(request: OpenAIEmbeddingRequest) -> list[str]:
97
+ return request.input if isinstance(request.input, list) else [request.input]
98
+
99
+
100
  def maybe_truncate(vector: np.ndarray, dimensions: int | None) -> np.ndarray:
101
  if dimensions is None or dimensions <= 0 or dimensions >= vector.shape[0]:
102
  return vector
 
107
  return truncated
108
 
109
 
110
+ def validate_model_name(model_name: str) -> None:
111
+ if model_name not in MODEL_ALIASES:
112
+ raise HTTPException(status_code=404, detail=f"Model '{model_name}' not found")
113
+
114
+
115
  def estimate_prompt_eval_count(texts: list[str], model: SentenceTransformer) -> int:
116
  tokenizer = getattr(model, "tokenizer", None)
117
  if tokenizer is None:
 
159
 
160
  @app.get("/api/tags")
161
  def api_tags() -> dict[str, Any]:
162
+ return {"models": [model_card(name) for name in MODEL_ALIASES]}
163
 
164
 
165
  @app.get("/api/ps")
 
184
 
185
  @app.post("/api/show")
186
  def api_show(request: EmbedRequest) -> dict[str, Any]:
187
+ validate_model_name(request.model)
 
188
  return {
189
  "license": "cc-by-nc-4.0",
190
  "modelfile": f"FROM {MODEL_ID}",
 
205
  return {
206
  "object": "list",
207
  "data": [
208
+ {"id": model_name, "object": "model", "created": now, "owned_by": "chmielvu"}
209
+ for model_name in MODEL_ALIASES
 
 
 
 
 
 
 
 
 
 
210
  ],
211
  }
212
 
213
 
214
  def embed_impl(request: EmbedRequest) -> dict[str, Any]:
215
+ validate_model_name(request.model)
 
216
 
217
  texts = normalize_inputs(request)
218
  model = get_model()
 
221
  total_duration = time.perf_counter_ns() - started
222
  payload = [maybe_truncate(vector, request.dimensions).astype(np.float32).tolist() for vector in vectors]
223
  return {
224
+ "model": request.model,
225
  "embeddings": payload,
226
  "total_duration": total_duration,
227
  "load_duration": _load_duration_ns,
 
246
  "load_duration": result["load_duration"],
247
  "prompt_eval_count": result["prompt_eval_count"],
248
  }
249
+
250
+
251
+ @app.post("/v1/embeddings")
252
+ def v1_embeddings(request: OpenAIEmbeddingRequest) -> dict[str, Any]:
253
+ validate_model_name(request.model)
254
+ texts = normalize_openai_inputs(request)
255
+ model = get_model()
256
+ started = time.perf_counter_ns()
257
+ vectors = np.asarray(model.encode(texts, convert_to_numpy=True))
258
+ total_duration = time.perf_counter_ns() - started
259
+
260
+ data = []
261
+ for idx, vector in enumerate(vectors):
262
+ vector = maybe_truncate(vector, request.dimensions).astype(np.float32)
263
+ embedding: list[float] | str
264
+ if request.encoding_format == "base64":
265
+ embedding = base64.b64encode(vector.tobytes()).decode("ascii")
266
+ else:
267
+ embedding = vector.tolist()
268
+ data.append({"object": "embedding", "index": idx, "embedding": embedding})
269
+
270
+ prompt_tokens = estimate_prompt_eval_count(texts, model)
271
+ return {
272
+ "object": "list",
273
+ "model": request.model,
274
+ "data": data,
275
+ "usage": {
276
+ "prompt_tokens": prompt_tokens,
277
+ "total_tokens": prompt_tokens,
278
+ },
279
+ "load_duration": _load_duration_ns,
280
+ "total_duration": total_duration,
281
+ }