VietCat commited on
Commit
b249c92
·
1 Parent(s): 9392b41

fix duplicate message

Browse files
Files changed (3) hide show
  1. app/embedding.py +11 -37
  2. app/llm.py +43 -108
  3. app/utils.py +28 -1
app/embedding.py CHANGED
@@ -2,9 +2,8 @@ from typing import List
2
  import numpy as np
3
  from loguru import logger
4
  import httpx
5
- from tenacity import retry, stop_after_attempt, wait_exponential
6
 
7
- from .utils import timing_decorator_async, timing_decorator_sync
8
 
9
  class EmbeddingClient:
10
  def __init__(self):
@@ -16,52 +15,27 @@ class EmbeddingClient:
16
  self._client = httpx.AsyncClient()
17
 
18
  @timing_decorator_async
19
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), reraise=True)
20
  async def create_embedding(self, text: str) -> List[float]:
21
  """
22
  Tạo embedding vector từ text bằng dịch vụ embedding (ví dụ OpenAI).
23
  Input: text (str)
24
  Output: list[float] embedding vector.
25
  """
 
 
26
  try:
27
- # This is a placeholder for your actual embedding service
28
- # You should replace this with your preferred embedding service (e.g., OpenAI, Cohere, etc.)
29
- # For example, using OpenAI's embedding API:
30
-
31
- # response = await self._client.post(
32
- # "https://api.openai.com/v1/embeddings",
33
- # headers={"Authorization": f"Bearer {api_key}"},
34
- # json={
35
- # "input": text,
36
- # "model": "text-embedding-ada-002"
37
- # }
38
- # )
39
- # embedding = response.json()["data"][0]["embedding"]
40
-
41
- # For now, we'll return a random vector as a placeholder
42
- # embedding = np.random.normal(0, 1, 1536).tolist() # 1536 is OpenAI's embedding dimension
43
- # return embedding
44
-
45
- logger.info(f"[DEBUG] Bắt đầu tạo embedding: {text}")
46
- response = await self._client.post(
47
- # "https://vietcat-bgem3node.hf.space/embed",
48
- "https://vietcat-vietnameseembeddingv2.hf.space/embed",
49
- json={"text": text}
50
- )
51
- response.raise_for_status()
52
- data = response.json()
53
- logger.info(f"[DEBUG] Embedding API response: {data['embedding'][:10]}...{data['embedding'][-10:]}")
54
- return data["embedding"]
55
  except Exception as e:
56
  logger.error(f"Error creating embedding: {e}")
57
- if 'response' in locals():
58
- try:
59
- logger.error(f"Embedding API status: {response.status_code}, content: {await response.aread()}")
60
- except Exception as ex:
61
- logger.error(f"Error reading response content: {ex}")
62
  raise
63
 
64
- @timing_decorator_sync
65
  def cosine_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
66
  """
67
  Tính cosine similarity giữa hai embedding.
 
2
  import numpy as np
3
  from loguru import logger
4
  import httpx
 
5
 
6
+ from .utils import timing_decorator_async, timing_decorator_sync, call_endpoint_with_retry
7
 
8
  class EmbeddingClient:
9
  def __init__(self):
 
15
  self._client = httpx.AsyncClient()
16
 
17
  @timing_decorator_async
 
18
  async def create_embedding(self, text: str) -> List[float]:
19
  """
20
  Tạo embedding vector từ text bằng dịch vụ embedding (ví dụ OpenAI).
21
  Input: text (str)
22
  Output: list[float] embedding vector.
23
  """
24
+ url = "https://vietcat-vietnameseembeddingv2.hf.space/embed"
25
+ payload = {"text": text}
26
  try:
27
+ response = await call_endpoint_with_retry(self._client, url, payload)
28
+ if response is not None:
29
+ data = response.json()
30
+ logger.info(f"[DEBUG] Embedding API response: {data['embedding'][:10]}...{data['embedding'][-10:]}")
31
+ return data["embedding"]
32
+ else:
33
+ logger.error("Embedding API response is None")
34
+ raise RuntimeError("Embedding API response is None")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  except Exception as e:
36
  logger.error(f"Error creating embedding: {e}")
 
 
 
 
 
37
  raise
38
 
 
39
  def cosine_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
40
  """
41
  Tính cosine similarity giữa hai embedding.
app/llm.py CHANGED
@@ -5,7 +5,7 @@ from loguru import logger
5
  from tenacity import retry, stop_after_attempt, wait_exponential
6
  import os
7
 
8
- from .utils import timing_decorator_async, timing_decorator_sync
9
 
10
  class LLMClient:
11
  """
@@ -118,125 +118,60 @@ class LLMClient:
118
  logger.error(f"[LLM] Error generating text with {self.provider}: {e}")
119
  raise
120
 
121
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), reraise=True)
122
  async def _generate_openai(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> str:
123
- """Generate text với OpenAI API."""
124
- messages = []
125
-
126
- if system_prompt:
127
- messages.append({"role": "system", "content": system_prompt})
128
-
129
- messages.append({"role": "user", "content": prompt})
130
-
131
- payload = {
132
- "model": kwargs.get("model", self.model),
133
- "messages": messages,
134
- "max_tokens": kwargs.get("max_tokens", self.max_tokens),
135
- "temperature": kwargs.get("temperature", self.temperature),
136
- "stream": False
137
- }
138
-
139
- headers = {
140
- "Authorization": f"Bearer {self.api_key}",
141
- "Content-Type": "application/json"
142
- }
143
-
144
- response = await self._client.post(
145
- f"{self.base_url}/chat/completions",
146
- headers=headers,
147
- json=payload
148
- )
149
- response.raise_for_status()
150
-
151
- data = response.json()
152
- return data["choices"][0]["message"]["content"]
153
 
154
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), reraise=True)
155
  async def _generate_huggingface(self, prompt: str, **kwargs) -> str:
156
- """Generate text với HuggingFace API."""
157
- payload = {
158
- "inputs": prompt,
159
- "parameters": {
160
- "max_new_tokens": kwargs.get("max_tokens", self.max_tokens),
161
- "temperature": kwargs.get("temperature", self.temperature),
162
- "return_full_text": False
163
- }
164
- }
165
-
166
- headers = {
167
- "Authorization": f"Bearer {self.api_key}",
168
- "Content-Type": "application/json"
169
- }
170
-
171
- response = await self._client.post(
172
- f"{self.base_url}/models/{self.model}",
173
- headers=headers,
174
- json=payload
175
- )
176
- response.raise_for_status()
177
-
178
- data = response.json()
179
- return data[0]["generated_text"]
180
 
181
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), reraise=True)
182
  async def _generate_local(self, prompt: str, **kwargs) -> str:
183
- """Generate text với local model."""
184
- payload = {
185
- "prompt": prompt,
186
- "max_tokens": kwargs.get("max_tokens", self.max_tokens),
187
- "temperature": kwargs.get("temperature", self.temperature),
188
- "model": kwargs.get("model", self.model)
189
- }
190
-
191
- response = await self._client.post(
192
- f"{self.base_url}/generate",
193
- json=payload
194
- )
195
- response.raise_for_status()
196
-
197
- data = response.json()
198
- return data.get("text", "")
199
 
200
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), reraise=True)
201
  async def _generate_custom(self, prompt: str, **kwargs) -> str:
202
- """Generate text với custom provider."""
203
- payload = {
204
- "prompt": prompt,
205
- "max_tokens": kwargs.get("max_tokens", self.max_tokens),
206
- "temperature": kwargs.get("temperature", self.temperature),
207
- "model": kwargs.get("model", self.model)
208
- }
209
-
210
- headers = {}
211
- if self.api_key:
212
- headers["Authorization"] = f"Bearer {self.api_key}"
213
-
214
- response = await self._client.post(
215
- f"{self.base_url}/generate",
216
- headers=headers,
217
- json=payload
218
- )
219
- response.raise_for_status()
220
-
221
- data = response.json()
222
- return data.get("text", "")
223
 
224
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), reraise=True)
225
  async def _generate_hfs(self, prompt: str, **kwargs) -> str:
226
- """Generate text với HFS provider."""
227
  endpoint = f"{self.base_url}/purechat"
228
  payload = {"prompt": prompt}
229
- logger.info(f"[LLM][FLOW] Chuẩn bị gửi request tới HFS endpoint: {endpoint}")
230
- logger.info(f"[LLM][FLOW] Payload gửi đi: {payload}")
231
  headers = {}
232
  if self.api_key:
233
  headers["Authorization"] = f"Bearer {self.api_key}"
234
- try:
235
- response = await self._client.post(endpoint, headers=headers, json=payload)
236
- logger.info(f"[LLM][FLOW] Đã nhận response từ HFS, status: {response.status_code}")
237
- response.raise_for_status()
238
  data = response.json()
239
- logger.info(f"[LLM][FLOW] Response data: {data}")
240
  if 'response' in data:
241
  return data['response']
242
  elif 'result' in data:
@@ -244,9 +179,9 @@ class LLMClient:
244
  elif 'data' in data and isinstance(data['data'], list):
245
  return data['data'][0]
246
  return str(data)
247
- except Exception as e:
248
- logger.error(f"[LLM][FLOW][ERROR] Lỗi khi gọi HFS endpoint: {endpoint} | Exception: {e}")
249
- raise
250
 
251
  @timing_decorator_async
252
  async def chat(
 
5
  from tenacity import retry, stop_after_attempt, wait_exponential
6
  import os
7
 
8
+ from .utils import timing_decorator_async, timing_decorator_sync, call_endpoint_with_retry
9
 
10
  class LLMClient:
11
  """
 
118
  logger.error(f"[LLM] Error generating text with {self.provider}: {e}")
119
  raise
120
 
 
121
  async def _generate_openai(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> str:
122
+ url = f"{self.base_url}/chat/completions"
123
+ payload = {"model": kwargs.get("model", self.model), "messages": [{"role": "system", "content": system_prompt or ""}, {"role": "user", "content": prompt}], "max_tokens": kwargs.get("max_tokens", self.max_tokens), "temperature": kwargs.get("temperature", self.temperature), "stream": False}
124
+ headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
125
+ response = await call_endpoint_with_retry(self._client, url, payload, headers=headers)
126
+ if response is not None:
127
+ data = response.json()
128
+ return data["choices"][0]["message"]["content"]
129
+ else:
130
+ logger.error("OpenAI API response is None")
131
+ raise RuntimeError("OpenAI API response is None")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
 
133
  async def _generate_huggingface(self, prompt: str, **kwargs) -> str:
134
+ url = f"{self.base_url}/generate"
135
+ payload = {"inputs": prompt}
136
+ response = await call_endpoint_with_retry(self._client, url, payload)
137
+ if response is not None:
138
+ data = response.json()
139
+ return data[0]["generated_text"]
140
+ else:
141
+ logger.error("HuggingFace API response is None")
142
+ raise RuntimeError("HuggingFace API response is None")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
 
144
  async def _generate_local(self, prompt: str, **kwargs) -> str:
145
+ url = f"{self.base_url}/generate"
146
+ payload = {"prompt": prompt}
147
+ response = await call_endpoint_with_retry(self._client, url, payload)
148
+ if response is not None:
149
+ data = response.json()
150
+ return data.get("text", "")
151
+ else:
152
+ logger.error("Local API response is None")
153
+ raise RuntimeError("Local API response is None")
 
 
 
 
 
 
 
154
 
 
155
  async def _generate_custom(self, prompt: str, **kwargs) -> str:
156
+ url = f"{self.base_url}/custom"
157
+ payload = {"prompt": prompt}
158
+ response = await call_endpoint_with_retry(self._client, url, payload)
159
+ if response is not None:
160
+ data = response.json()
161
+ return data.get("text", "")
162
+ else:
163
+ logger.error("Custom API response is None")
164
+ raise RuntimeError("Custom API response is None")
 
 
 
 
 
 
 
 
 
 
 
 
165
 
 
166
  async def _generate_hfs(self, prompt: str, **kwargs) -> str:
 
167
  endpoint = f"{self.base_url}/purechat"
168
  payload = {"prompt": prompt}
 
 
169
  headers = {}
170
  if self.api_key:
171
  headers["Authorization"] = f"Bearer {self.api_key}"
172
+ response = await call_endpoint_with_retry(self._client, endpoint, payload, 3, 300, headers=headers)
173
+ if response is not None:
 
 
174
  data = response.json()
 
175
  if 'response' in data:
176
  return data['response']
177
  elif 'result' in data:
 
179
  elif 'data' in data and isinstance(data['data'], list):
180
  return data['data'][0]
181
  return str(data)
182
+ else:
183
+ logger.error("HFS API response is None")
184
+ raise RuntimeError("HFS API response is None")
185
 
186
  @timing_decorator_async
187
  async def chat(
app/utils.py CHANGED
@@ -3,6 +3,8 @@ from functools import wraps
3
  from loguru import logger
4
  from typing import Any, Callable
5
  import os
 
 
6
 
7
  def timing_decorator_async(func: Callable) -> Callable:
8
  """
@@ -98,4 +100,29 @@ def validate_config(settings) -> None:
98
  if not getattr(settings, field, None):
99
  missing.append(field)
100
  if missing:
101
- raise RuntimeError(f"Missing config: {', '.join(missing)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from loguru import logger
4
  from typing import Any, Callable
5
  import os
6
+ import asyncio
7
+ import httpx
8
 
9
  def timing_decorator_async(func: Callable) -> Callable:
10
  """
 
100
  if not getattr(settings, field, None):
101
  missing.append(field)
102
  if missing:
103
+ raise RuntimeError(f"Missing config: {', '.join(missing)}")
104
+
105
+ def get_logger():
106
+ return logger
107
+
108
+ async def call_endpoint_with_retry(client, url, payload, max_retries=3, base_timeout=30, headers=None):
109
+ logger = get_logger()
110
+ timeout = base_timeout
111
+ for attempt in range(1, max_retries + 1):
112
+ try:
113
+ response = await client.post(url, json=payload, timeout=timeout, headers=headers)
114
+ response.raise_for_status()
115
+ return response
116
+ except httpx.TimeoutException as e:
117
+ if attempt == max_retries:
118
+ raise
119
+ else:
120
+ logger.warning(f"Timeout (attempt {attempt}/{max_retries}), retrying with timeout={timeout * 2}s...")
121
+ timeout *= 2
122
+ await asyncio.sleep(1)
123
+ except httpx.HTTPStatusError as e:
124
+ logger.error(f"HTTP error: {e.response.status_code} - {e.response.text}")
125
+ raise
126
+ except Exception as e:
127
+ logger.error(f"Other error: {e}")
128
+ raise