import gradio as gr from huggingface_hub import InferenceClient # لاحظ: يجب تمرير التوكن بطريقة أخرى، مثلاً عبر متغير بيئة أو إعدادات المساحة # هنا ننشئ العميل بدون توكن (إذا النموذج يسمح) client = InferenceClient("mradermacher/airoboros_none_resp_gpt-4o-mini_inst_gpt-4o_resp-GGUF") def respond(messages, system_message, max_tokens, temperature, top_p): chat_messages = [{"role": "system", "content": system_message}] + messages response = "" prompt = "" for msg in chat_messages: role = msg.get("role", "") content = msg.get("content", "") if role == "system": prompt += f"System: {content}\n" elif role == "user": prompt += f"User: {content}\n" elif role == "assistant": prompt += f"Assistant: {content}\n" prompt += "Assistant: " generation = client.text_generation( prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=True, ) for token in generation: response += token.text yield response demo = gr.ChatInterface( fn=respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], type="messages", ) if __name__ == "__main__": demo.launch()