m97j commited on
Commit
307f6b8
ยท
1 Parent(s): 67a03d1

Initial codes commit

Browse files
Files changed (2) hide show
  1. modules/embedder.py +9 -3
  2. requirements.txt +2 -2
modules/embedder.py CHANGED
@@ -4,6 +4,7 @@ from typing import List
4
  from huggingface_hub import InferenceClient
5
  from config import EMBED_MODEL, HF_TOKEN
6
 
 
7
  _client = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN)
8
 
9
  def _l2_normalize(vec: List[float]) -> List[float]:
@@ -11,7 +12,12 @@ def _l2_normalize(vec: List[float]) -> List[float]:
11
  return [x / norm for x in vec]
12
 
13
  def get_embedding(text: str) -> List[float]:
14
- # embeddings()๋Š” [batch_size, embedding_dim] ํ˜•ํƒœ ๋ฐ˜ํ™˜
15
- embedding_2d = _client.embeddings(text)
16
- vec = embedding_2d[0] # ์ฒซ ๋ฒˆ์งธ ํ–‰์ด ์ž…๋ ฅ ๋ฌธ์žฅ์˜ ๋ฒกํ„ฐ
 
 
 
 
17
  return _l2_normalize(vec)
 
 
4
  from huggingface_hub import InferenceClient
5
  from config import EMBED_MODEL, HF_TOKEN
6
 
7
+ # ๋ชจ๋ธ๊ณผ ํ† ํฐ ์ง€์ •
8
  _client = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN)
9
 
10
  def _l2_normalize(vec: List[float]) -> List[float]:
 
12
  return [x / norm for x in vec]
13
 
14
  def get_embedding(text: str) -> List[float]:
15
+ # embeddings ํƒœ์Šคํฌ๋ฅผ ์ง์ ‘ ์ง€์ •
16
+ response = _client.post(
17
+ json={"inputs": text},
18
+ task="embeddings"
19
+ )
20
+ # ํ•ญ์ƒ [batch_size, embedding_dim] ํ˜•ํƒœ ๋ฐ˜ํ™˜
21
+ vec = response[0]
22
  return _l2_normalize(vec)
23
+
requirements.txt CHANGED
@@ -6,11 +6,11 @@ python-multipart==0.0.9
6
 
7
  # ๋ฒกํ„ฐ ๊ฒ€์ƒ‰/์ž„๋ฒ ๋”ฉ ๊ด€๋ จ
8
  faiss-cpu==1.8.0
9
- numpy==1.26.4
10
 
11
  # ๋ฐ์ดํ„ฐ ๋ชจ๋ธ๋ง
12
  pydantic==2.11.5
13
 
14
  # Hugging Face ๊ด€๋ จ
15
- huggingface_hub==0.24.6
16
  datasets==3.0.1
 
6
 
7
  # ๋ฒกํ„ฐ ๊ฒ€์ƒ‰/์ž„๋ฒ ๋”ฉ ๊ด€๋ จ
8
  faiss-cpu==1.8.0
9
+ numpy>=1.26,<2.0
10
 
11
  # ๋ฐ์ดํ„ฐ ๋ชจ๋ธ๋ง
12
  pydantic==2.11.5
13
 
14
  # Hugging Face ๊ด€๋ จ
15
+ huggingface_hub>=1.0.0
16
  datasets==3.0.1