import gradio as gr import os from huggingface_hub import InferenceClient # Get token from environment variable for security # In HuggingFace Spaces, set this in the Settings tab HF_TOKEN = os.environ.get('HUGGINGFACE_TOKEN') # Initialize the HuggingFace Inference Client client = InferenceClient(token=HF_TOKEN) def chatbot_hf(question, temperature=0.7, model='google/gemma-2-2b-it'): # Send the question to HuggingFace model response = client.chat_completion( model=model, messages=[{"role": "user", "content": question}], temperature=temperature, max_tokens=500 # Maximum length of response ) # Extract and return the response text return response.choices[0].message.content def main(): # Define available models AVAILABLE_MODELS = [ "google/gemma-2-2b-it", "meta-llama/Llama-2-7b-chat-hf", "mistralai/Mixtral-8x7B-Instruct-v0.1", "HuggingFaceH4/zephyr-7b-beta" ] # Create the Gradio interface with a more polished layout demo = gr.Interface( fn=chatbot_hf, inputs=[ gr.Textbox( label="Your Question", lines=2, placeholder="Type your message here...", scale=3 ), gr.Slider( label="Temperature", minimum=0.0, maximum=1.0, step=0.01, value=0.7, info="Higher values make output more random, lower values more focused" ), gr.Dropdown( label="Select Model", choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], info="Choose the AI model to chat with" ), ], outputs=gr.Textbox(label="AI Response", lines=20), title="🤖 HuggingFace Chat Interface", description=""" Chat with various large language models hosted on HuggingFace. Adjust the temperature to control response creativity. """, article=""" ### Tips - For factual responses, use lower temperature (0.1-0.3) - For creative writing, use higher temperature (0.7-0.9) - Different models may have different strengths """ ) demo.launch() if __name__ == "__main__": main()