Jake-seong commited on
Commit
54857b3
ยท
verified ยท
1 Parent(s): c1996ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -32,21 +32,23 @@ def get_db_conn():
32
  def get_embedding(text: str) -> List[float]:
33
  """
34
  ํ…์ŠคํŠธ๋ฅผ OpenAI์˜ text-embedding-3-small ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
 
35
 
36
  Args:
37
  text (str): ์ž„๋ฒ ๋”ฉํ•  ํ…์ŠคํŠธ
38
 
39
  Returns:
40
- List[float]: ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ
41
  """
42
  try:
43
  response = client.embeddings.create(
44
  input=text,
45
  model="text-embedding-3-small",
46
- encoding_format="float", # ๋ช…์‹œ์ ์œผ๋กœ float ํ˜•์‹ ์ง€์ •
47
- dimensions=1536 # ์ฐจ์› ์ˆ˜ ๋ช…์‹œ
48
  )
49
- return response.data[0].embedding
 
50
  except Exception as e:
51
  print(f"์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
52
  raise
@@ -54,8 +56,16 @@ def get_embedding(text: str) -> List[float]:
54
  def format_vector_for_pg(vector: List[float]) -> str:
55
  """
56
  ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ๋ฅผ PostgreSQL ํฌ๋งท์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
57
- ์ž๋ฐ”์˜ formatVectorForPg() ๋ฉ”์†Œ๋“œ์™€ ๋™์ผํ•œ ๊ธฐ๋Šฅ์ž…๋‹ˆ๋‹ค.
 
58
  """
 
 
 
 
 
 
 
59
  # ์ž๋ฐ” ๊ตฌํ˜„๊ณผ ๋™์ผํ•˜๊ฒŒ StringBuilder ๋ฐฉ์‹์œผ๋กœ ๊ตฌํ˜„
60
  vector_str = ','.join([f"{x}" for x in vector])
61
  return f"[{vector_str}]"
 
32
  def get_embedding(text: str) -> List[float]:
33
  """
34
  ํ…์ŠคํŠธ๋ฅผ OpenAI์˜ text-embedding-3-small ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
35
+ Java์˜ float[](float32)์™€ ํ˜ธํ™˜๋˜๋„๋ก ๋ช…์‹œ์ ์œผ๋กœ float32๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
36
 
37
  Args:
38
  text (str): ์ž„๋ฒ ๋”ฉํ•  ํ…์ŠคํŠธ
39
 
40
  Returns:
41
+ List[float]: ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ (float32)
42
  """
43
  try:
44
  response = client.embeddings.create(
45
  input=text,
46
  model="text-embedding-3-small",
47
+ encoding_format="float",
48
+ dimensions=1536
49
  )
50
+ # ๋ช…์‹œ์ ์œผ๋กœ float32๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ Java์˜ float[]์™€ ํ˜ธํ™˜๋˜๊ฒŒ ํ•จ
51
+ return np.array(response.data[0].embedding, dtype=np.float32).tolist()
52
  except Exception as e:
53
  print(f"์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
54
  raise
 
56
  def format_vector_for_pg(vector: List[float]) -> str:
57
  """
58
  ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ๋ฅผ PostgreSQL ํฌ๋งท์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
59
+ Java์˜ formatVectorForPg() ๋ฉ”์†Œ๋“œ์™€ ๋™์ผํ•œ ๊ธฐ๋Šฅ์ž…๋‹ˆ๋‹ค.
60
+ ์ž…๋ ฅ๋œ ๋ฒกํ„ฐ๊ฐ€ float32 ํƒ€์ž…์ธ์ง€ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค.
61
  """
62
+ # ๋ฒกํ„ฐ๊ฐ€ float32 ํƒ€์ž…์ธ์ง€ ํ™•์ธํ•˜๊ณ , ์•„๋‹ˆ๋ฉด ๋ณ€ํ™˜
63
+ # NumPy ๋ฐฐ์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋„ ์ฒ˜๋ฆฌ
64
+ if not isinstance(vector, np.ndarray):
65
+ vector = np.array(vector, dtype=np.float32)
66
+ elif vector.dtype != np.float32:
67
+ vector = vector.astype(np.float32)
68
+
69
  # ์ž๋ฐ” ๊ตฌํ˜„๊ณผ ๋™์ผํ•˜๊ฒŒ StringBuilder ๋ฐฉ์‹์œผ๋กœ ๊ตฌํ˜„
70
  vector_str = ','.join([f"{x}" for x in vector])
71
  return f"[{vector_str}]"