fahmiaziz98 commited on
Commit
36e672d
·
1 Parent(s): f435ac4

[UPDATE] Response query

Browse files
src/api/routers/embedding.py CHANGED
@@ -229,43 +229,54 @@ async def create_query_embedding(
229
  )
230
  processing_time = time.time() - start_time
231
 
232
- sparse_result = sparse_results[0]
233
- sparse_embedding = SparseEmbedding(
234
- text=request.texts[0],
235
- indices=sparse_result["indices"],
236
- values=sparse_result["values"],
237
- )
 
 
 
 
238
 
239
  response = SparseEmbedResponse(
240
- sparse_embedding=sparse_embedding,
 
241
  model_id=request.model_id,
242
  processing_time=processing_time,
243
  )
244
  else:
245
- # Dense embedding
246
- embeddings = model.embed_query(
247
  texts=request.texts, prompt=request.prompt, **kwargs
248
  )
249
  processing_time = time.time() - start_time
250
 
251
  response = DenseEmbedResponse(
252
  embeddings=embeddings,
253
- dimension=len(embeddings[0]),
 
254
  model_id=request.model_id,
255
  processing_time=processing_time,
256
- count=len(embeddings),
257
  )
258
 
259
- # Cache the result
260
- if cache is not None:
 
261
  cache.set(
262
- texts=request.texts,
263
  model_id=request.model_id,
264
  result=response,
265
  prompt=request.prompt,
266
- **cache_key_kwargs,
267
  )
268
 
 
 
 
 
 
269
  return response
270
 
271
  except (ValidationError, ModelNotFoundError) as e:
 
229
  )
230
  processing_time = time.time() - start_time
231
 
232
+ # Convert to SparseEmbedding objects
233
+ sparse_embeddings = []
234
+ for idx, sparse_result in enumerate(sparse_results):
235
+ sparse_embeddings.append(
236
+ SparseEmbedding(
237
+ text=request.texts[idx],
238
+ indices=sparse_result["indices"],
239
+ values=sparse_result["values"],
240
+ )
241
+ )
242
 
243
  response = SparseEmbedResponse(
244
+ embeddings=sparse_embeddings,
245
+ count=len(sparse_embeddings),
246
  model_id=request.model_id,
247
  processing_time=processing_time,
248
  )
249
  else:
250
+ # Dense batch embeddings
251
+ embeddings = model.embed_documents(
252
  texts=request.texts, prompt=request.prompt, **kwargs
253
  )
254
  processing_time = time.time() - start_time
255
 
256
  response = DenseEmbedResponse(
257
  embeddings=embeddings,
258
+ dimension=len(embeddings[0]) if embeddings else 0,
259
+ count=len(embeddings),
260
  model_id=request.model_id,
261
  processing_time=processing_time,
 
262
  )
263
 
264
+ # Cache small batches
265
+ if cache is not None and len(request.texts) <= 10:
266
+ cache_key = str(sorted(request.texts))
267
  cache.set(
268
+ texts=cache_key,
269
  model_id=request.model_id,
270
  result=response,
271
  prompt=request.prompt,
272
+ **kwargs,
273
  )
274
 
275
+ logger.info(
276
+ f"Generated {len(request.texts)} embeddings "
277
+ f"in {processing_time:.3f}s ({len(request.texts) / processing_time:.1f} texts/s)"
278
+ )
279
+
280
  return response
281
 
282
  except (ValidationError, ModelNotFoundError) as e:
src/config/settings.py CHANGED
@@ -36,12 +36,12 @@ class Settings(BaseSettings):
36
  PRELOAD_MODELS: bool = True # Load all models at startup
37
 
38
  # Request Limits
39
- MAX_TEXT_LENGTH: int = 8192 # Maximum characters per text
40
  MAX_BATCH_SIZE: int = 100 # Maximum texts per batch request
41
  REQUEST_TIMEOUT: int = 30 # Request timeout in seconds
42
 
43
  # Cache Configuration
44
- ENABLE_CACHE: bool = False # Enable response caching (Phase 2)
45
  CACHE_TTL: int = 3600 # Cache time-to-live in seconds
46
  CACHE_MAX_SIZE: int = 1000 # Maximum cache entries
47
 
 
36
  PRELOAD_MODELS: bool = True # Load all models at startup
37
 
38
  # Request Limits
39
+ MAX_TEXT_LENGTH: int = 32000 # Maximum characters per text
40
  MAX_BATCH_SIZE: int = 100 # Maximum texts per batch request
41
  REQUEST_TIMEOUT: int = 30 # Request timeout in seconds
42
 
43
  # Cache Configuration
44
+ ENABLE_CACHE: bool = True # Enable response caching (Phase 2)
45
  CACHE_TTL: int = 3600 # Cache time-to-live in seconds
46
  CACHE_MAX_SIZE: int = 1000 # Maximum cache entries
47