Spaces:

minhvtt
/

ChatbotRAG

Sleeping

minhvtt commited on 6 days ago

Commit

b4fb1fd

verified ·

1 Parent(s): 0e32cea

Update agent_service.py

Files changed (1) hide show

agent_service.py CHANGED Viewed

@@ -310,17 +310,14 @@ class AgentService:
         try:
             from huggingface_hub import AsyncInferenceClient
-            # Create async client with Sambanova provider for Llama 70B
-            # Sambanova hosts Llama models for free via HuggingFace
-            client = AsyncInferenceClient(
-                token=self.hf_token,
-                provider="sambanova"  # Use Sambanova for Llama 70B
-            )
             # Call HF API with chat completion and native tools
             response = await client.chat_completion(
                 messages=messages,
-                model="meta-llama/Llama-3.3-70B-Instruct",  # Llama 3.3 70B via Sambanova
                 max_tokens=512,
                 temperature=0.7,
                 tools=tools,

         try:
             from huggingface_hub import AsyncInferenceClient
+            # Create async client - Qwen2.5 works on default HuggingFace API
+            client = AsyncInferenceClient(token=self.hf_token)
             # Call HF API with chat completion and native tools
+            # Qwen2.5-72B-Instruct: Best for Vietnamese - state-of-the-art performance
             response = await client.chat_completion(
                 messages=messages,
+                model="Qwen/Qwen2.5-72B-Instruct",  # Best for Vietnamese + tool calling
                 max_tokens=512,
                 temperature=0.7,
                 tools=tools,