from huggingface_hub import InferenceClient import os # 1) Put your token here or set HF_TOKEN in env API_KEY = os.getenv("HF_TOKEN", "hf_your_token_here") client = InferenceClient(api_key=API_KEY) SYSTEM_PROMPT = "You are a helpful assistant in a medical AI course. Answer clearly and safely." PARAMS = { "max_tokens": 300, "temperature": 0.7, "top_p": 0.9, } def simple_chat(user_message: str, model_name: str): if not user_message.strip(): return "Please ask a question!" messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_message}, ] try: completion = client.chat.completions.create( model=model_name, messages=messages, max_tokens=PARAMS["max_tokens"], temperature=PARAMS["temperature"], top_p=PARAMS["top_p"], ) return completion.choices[0].message.content except Exception as e: # Print real error so you can debug return f"Raw error from Hugging Face:\n{repr(e)}" if __name__ == "__main__": # Start with ONE model you know is served by Inference Providers: test_model = "HuggingFaceTB/SmolLM3-3B" print("Testing model:", test_model) print(simple_chat("Say hello in one sentence.", test_model))