File size: 1,324 Bytes
e2a6bfc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from huggingface_hub import InferenceClient
import os

# 1) Put your token here or set HF_TOKEN in env
API_KEY = os.getenv("HF_TOKEN", "hf_your_token_here")

client = InferenceClient(api_key=API_KEY)

SYSTEM_PROMPT = "You are a helpful assistant in a medical AI course. Answer clearly and safely."

PARAMS = {
    "max_tokens": 300,
    "temperature": 0.7,
    "top_p": 0.9,
}

def simple_chat(user_message: str, model_name: str):
    if not user_message.strip():
        return "Please ask a question!"

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_message},
    ]

    try:
        completion = client.chat.completions.create(
            model=model_name,
            messages=messages,
            max_tokens=PARAMS["max_tokens"],
            temperature=PARAMS["temperature"],
            top_p=PARAMS["top_p"],
        )
        return completion.choices[0].message.content
    except Exception as e:
        # Print real error so you can debug
        return f"Raw error from Hugging Face:\n{repr(e)}"


if __name__ == "__main__":
    # Start with ONE model you know is served by Inference Providers:
    test_model = "HuggingFaceTB/SmolLM3-3B"
    print("Testing model:", test_model)
    print(simple_chat("Say hello in one sentence.", test_model))