import gradio as gr from huggingface_hub import InferenceClient def respond( message, history: list[list[str]], system_message, max_tokens, temperature, top_p, hf_token: gr.OAuthToken, ): client = InferenceClient( token=hf_token.token, model="DeepHat/DeepHat-V1-7B" ) # Construção correta do histórico messages = [{"role": "system", "content": system_message}] for user_msg, bot_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": bot_msg}) # Adiciona a nova mensagem do usuário messages.append({"role": "user", "content": message}) response = "" for msg in client.chat_completion( messages=messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): choices = msg.choices token = "" if len(choices) and choices[0].delta and choices[0].delta.content: token = choices[0].delta.content response += token yield response