from huggingface_hub import InferenceClient
import os

# 1) Put your token here or set HF_TOKEN in env
API_KEY = os.getenv("HF_TOKEN", "hf_your_token_here")

client = InferenceClient(api_key=API_KEY)

SYSTEM_PROMPT = "You are a helpful assistant in a medical AI course. Answer clearly and safely."

PARAMS = {
    "max_tokens": 300,
    "temperature": 0.7,
    "top_p": 0.9,
}

def simple_chat(user_message: str, model_name: str):
    if not user_message.strip():
        return "Please ask a question!"

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_message},
    ]

    try:
        completion = client.chat.completions.create(
            model=model_name,
            messages=messages,
            max_tokens=PARAMS["max_tokens"],
            temperature=PARAMS["temperature"],
            top_p=PARAMS["top_p"],
        )
        return completion.choices[0].message.content
    except Exception as e:
        # Print real error so you can debug
        return f"Raw error from Hugging Face:\n{repr(e)}"


if __name__ == "__main__":
    # Start with ONE model you know is served by Inference Providers:
    test_model = "HuggingFaceTB/SmolLM3-3B"
    print("Testing model:", test_model)
    print(simple_chat("Say hello in one sentence.", test_model))