Initial codes commit
Browse files- modules/embedder.py +9 -3
- requirements.txt +2 -2
modules/embedder.py
CHANGED
|
@@ -4,6 +4,7 @@ from typing import List
|
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
from config import EMBED_MODEL, HF_TOKEN
|
| 6 |
|
|
|
|
| 7 |
_client = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN)
|
| 8 |
|
| 9 |
def _l2_normalize(vec: List[float]) -> List[float]:
|
|
@@ -11,7 +12,12 @@ def _l2_normalize(vec: List[float]) -> List[float]:
|
|
| 11 |
return [x / norm for x in vec]
|
| 12 |
|
| 13 |
def get_embedding(text: str) -> List[float]:
|
| 14 |
-
# embeddings
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
return _l2_normalize(vec)
|
|
|
|
|
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
from config import EMBED_MODEL, HF_TOKEN
|
| 6 |
|
| 7 |
+
# ๋ชจ๋ธ๊ณผ ํ ํฐ ์ง์
|
| 8 |
_client = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN)
|
| 9 |
|
| 10 |
def _l2_normalize(vec: List[float]) -> List[float]:
|
|
|
|
| 12 |
return [x / norm for x in vec]
|
| 13 |
|
| 14 |
def get_embedding(text: str) -> List[float]:
|
| 15 |
+
# embeddings ํ์คํฌ๋ฅผ ์ง์ ์ง์
|
| 16 |
+
response = _client.post(
|
| 17 |
+
json={"inputs": text},
|
| 18 |
+
task="embeddings"
|
| 19 |
+
)
|
| 20 |
+
# ํญ์ [batch_size, embedding_dim] ํํ ๋ฐํ
|
| 21 |
+
vec = response[0]
|
| 22 |
return _l2_normalize(vec)
|
| 23 |
+
|
requirements.txt
CHANGED
|
@@ -6,11 +6,11 @@ python-multipart==0.0.9
|
|
| 6 |
|
| 7 |
# ๋ฒกํฐ ๊ฒ์/์๋ฒ ๋ฉ ๊ด๋ จ
|
| 8 |
faiss-cpu==1.8.0
|
| 9 |
-
numpy
|
| 10 |
|
| 11 |
# ๋ฐ์ดํฐ ๋ชจ๋ธ๋ง
|
| 12 |
pydantic==2.11.5
|
| 13 |
|
| 14 |
# Hugging Face ๊ด๋ จ
|
| 15 |
-
huggingface_hub
|
| 16 |
datasets==3.0.1
|
|
|
|
| 6 |
|
| 7 |
# ๋ฒกํฐ ๊ฒ์/์๋ฒ ๋ฉ ๊ด๋ จ
|
| 8 |
faiss-cpu==1.8.0
|
| 9 |
+
numpy>=1.26,<2.0
|
| 10 |
|
| 11 |
# ๋ฐ์ดํฐ ๋ชจ๋ธ๋ง
|
| 12 |
pydantic==2.11.5
|
| 13 |
|
| 14 |
# Hugging Face ๊ด๋ จ
|
| 15 |
+
huggingface_hub>=1.0.0
|
| 16 |
datasets==3.0.1
|