Spaces:

minhvtt
/

ChatbotRAG

Sleeping

minhvtt commited on 12 days ago

Commit

40a1ee2

verified ·

1 Parent(s): 51abb53

Update agent_service.py

Files changed (1) hide show

agent_service.py CHANGED Viewed

@@ -248,27 +248,25 @@ Example:
     async def _call_llm(self, messages: List[Dict]) -> str:
         """
-        Call HuggingFace LLM directly using InferenceClient
         """
         try:
             from huggingface_hub import AsyncInferenceClient
-            # Build prompt from messages
-            prompt = self._messages_to_prompt(messages)
             # Create async client
             client = AsyncInferenceClient(token=self.hf_token)
-            # Call HF API
             response_text = ""
-            async for token in await client.text_generation(
-                prompt=prompt,
-                model="openai/gpt-oss-20b",
-                max_new_tokens=512,
                 temperature=0.7,
                 stream=True
             ):
-                response_text += token
             return response_text
         except Exception as e:

     async def _call_llm(self, messages: List[Dict]) -> str:
         """
+        Call HuggingFace LLM directly using chat_completion (conversational)
         """
         try:
             from huggingface_hub import AsyncInferenceClient
             # Create async client
             client = AsyncInferenceClient(token=self.hf_token)
+            # Call HF API with chat completion (conversational)
             response_text = ""
+            async for message in await client.chat_completion(
+                messages=messages,  # Use messages directly
+                model="meta-llama/Llama-3.3-70B-Instruct",
+                max_tokens=512,
                 temperature=0.7,
                 stream=True
             ):
+                if message.choices and message.choices[0].delta.content:
+                    response_text += message.choices[0].delta.content
             return response_text
         except Exception as e: