import os import gradio as gr from huggingface_hub import InferenceClient # Load HF token securely from environment HF_TOKEN = os.getenv("HF_TOKEN") # Model you want to use (hosted on Hugging Face) MODEL_NAME = "mistralai/Voxtral-Small-24B-2507" # Create the inference client client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN) # Function to generate text def chat_with_model(prompt): if not prompt.strip(): return "Please enter a message." try: response = client.text_generation( prompt, max_new_tokens=200, temperature=0.7, ) return response except Exception as e: return f"⚠️ Error: {str(e)}" # Gradio UI interface = gr.Interface( fn=chat_with_model, inputs=gr.Textbox(label="Your Message", placeholder="Type your question here..."), outputs=gr.Textbox(label="Model Response"), title="Voxtral-Small-24B-2507 Chatbot", description="Chat live with the Mistral Voxtral Small 24B model via Hugging Face Inference API.", ) # Launch the app interface.launch()