File size: 1,324 Bytes
e2a6bfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from huggingface_hub import InferenceClient
import os
# 1) Put your token here or set HF_TOKEN in env
API_KEY = os.getenv("HF_TOKEN", "hf_your_token_here")
client = InferenceClient(api_key=API_KEY)
SYSTEM_PROMPT = "You are a helpful assistant in a medical AI course. Answer clearly and safely."
PARAMS = {
"max_tokens": 300,
"temperature": 0.7,
"top_p": 0.9,
}
def simple_chat(user_message: str, model_name: str):
if not user_message.strip():
return "Please ask a question!"
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_message},
]
try:
completion = client.chat.completions.create(
model=model_name,
messages=messages,
max_tokens=PARAMS["max_tokens"],
temperature=PARAMS["temperature"],
top_p=PARAMS["top_p"],
)
return completion.choices[0].message.content
except Exception as e:
# Print real error so you can debug
return f"Raw error from Hugging Face:\n{repr(e)}"
if __name__ == "__main__":
# Start with ONE model you know is served by Inference Providers:
test_model = "HuggingFaceTB/SmolLM3-3B"
print("Testing model:", test_model)
print(simple_chat("Say hello in one sentence.", test_model))
|