# rag/models/embedder.py from typing import List import numpy as np import onnxruntime as ort from fastapi import Request def _l2_normalize(vec: np.ndarray) -> List[float]: norm = np.linalg.norm(vec) or 1.0 return (vec / norm).tolist() def get_embedding(request: Request, text: str) -> List[float]: """ request.app.state.embedder_sess : ONNX Runtime InferenceSession request.app.state.embedder_tokenizer : 토크나이저 """ tokenizer = request.app.state.embedder_tokenizer sess: ort.InferenceSession = request.app.state.embedder_sess inputs = tokenizer(text, return_tensors="np", padding=True, truncation=True, max_length=256) ort_inputs = {k: v for k, v in inputs.items()} ort_outs = sess.run(None, ort_inputs) # 일반적으로 첫 번째 출력이 [batch, dim] 임베딩 vec = ort_outs[0][0] return _l2_normalize(vec)