minhvtt commited on
Commit
40a1ee2
verified
1 Parent(s): 51abb53

Update agent_service.py

Browse files
Files changed (1) hide show
  1. agent_service.py +8 -10
agent_service.py CHANGED
@@ -248,27 +248,25 @@ Example:
248
 
249
  async def _call_llm(self, messages: List[Dict]) -> str:
250
  """
251
- Call HuggingFace LLM directly using InferenceClient
252
  """
253
  try:
254
  from huggingface_hub import AsyncInferenceClient
255
 
256
- # Build prompt from messages
257
- prompt = self._messages_to_prompt(messages)
258
-
259
  # Create async client
260
  client = AsyncInferenceClient(token=self.hf_token)
261
 
262
- # Call HF API
263
  response_text = ""
264
- async for token in await client.text_generation(
265
- prompt=prompt,
266
- model="openai/gpt-oss-20b",
267
- max_new_tokens=512,
268
  temperature=0.7,
269
  stream=True
270
  ):
271
- response_text += token
 
272
 
273
  return response_text
274
  except Exception as e:
 
248
 
249
  async def _call_llm(self, messages: List[Dict]) -> str:
250
  """
251
+ Call HuggingFace LLM directly using chat_completion (conversational)
252
  """
253
  try:
254
  from huggingface_hub import AsyncInferenceClient
255
 
 
 
 
256
  # Create async client
257
  client = AsyncInferenceClient(token=self.hf_token)
258
 
259
+ # Call HF API with chat completion (conversational)
260
  response_text = ""
261
+ async for message in await client.chat_completion(
262
+ messages=messages, # Use messages directly
263
+ model="meta-llama/Llama-3.3-70B-Instruct",
264
+ max_tokens=512,
265
  temperature=0.7,
266
  stream=True
267
  ):
268
+ if message.choices and message.choices[0].delta.content:
269
+ response_text += message.choices[0].delta.content
270
 
271
  return response_text
272
  except Exception as e: