File size: 1,098 Bytes
4335673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import gradio as gr
from openai import OpenAI
from huggingface_hub import InferenceClient


# Initialize HF Router client using OpenAI SDK
'''
client = OpenAI(
    base_url="https://router.huggingface.co/v1",
    api_key=os.environ["HF_TOKEN"],   # ensure HF_TOKEN is set
)
'''

client = InferenceClient(api_key=os.environ["HF_TOKEN"])

# LLM function
def ask_llm(prompt):
    try:
        completion = client.chat.completions.create(
            model="meta-llama/Llama-3.1-8B-Instruct",
            messages=[
                {"role": "user", "content": prompt}
            ],
            max_tokens=200,
            temperature=0.7
        )
        #return completion.choices[0].message["content"]
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error: {str(e)}"


# Build Gradio UI
demo = gr.Interface(
    fn=ask_llm,
    inputs=gr.Textbox(lines=3, label="Ask the AI"),
    outputs=gr.Textbox(label="Response"),
    title="HF Inference Client LLM Demo",
    description="Powered by HuggingFace InferenceClient SDK."
)

demo.launch()