Spaces:
Sleeping
Sleeping
Update agent_service.py
Browse files- agent_service.py +4 -7
agent_service.py
CHANGED
|
@@ -310,17 +310,14 @@ class AgentService:
|
|
| 310 |
try:
|
| 311 |
from huggingface_hub import AsyncInferenceClient
|
| 312 |
|
| 313 |
-
# Create async client
|
| 314 |
-
|
| 315 |
-
client = AsyncInferenceClient(
|
| 316 |
-
token=self.hf_token,
|
| 317 |
-
provider="sambanova" # Use Sambanova for Llama 70B
|
| 318 |
-
)
|
| 319 |
|
| 320 |
# Call HF API with chat completion and native tools
|
|
|
|
| 321 |
response = await client.chat_completion(
|
| 322 |
messages=messages,
|
| 323 |
-
model="
|
| 324 |
max_tokens=512,
|
| 325 |
temperature=0.7,
|
| 326 |
tools=tools,
|
|
|
|
| 310 |
try:
|
| 311 |
from huggingface_hub import AsyncInferenceClient
|
| 312 |
|
| 313 |
+
# Create async client - Qwen2.5 works on default HuggingFace API
|
| 314 |
+
client = AsyncInferenceClient(token=self.hf_token)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
# Call HF API with chat completion and native tools
|
| 317 |
+
# Qwen2.5-72B-Instruct: Best for Vietnamese - state-of-the-art performance
|
| 318 |
response = await client.chat_completion(
|
| 319 |
messages=messages,
|
| 320 |
+
model="Qwen/Qwen2.5-72B-Instruct", # Best for Vietnamese + tool calling
|
| 321 |
max_tokens=512,
|
| 322 |
temperature=0.7,
|
| 323 |
tools=tools,
|