Nullpointer-KK commited on
Commit
3af1594
·
verified ·
1 Parent(s): 63b7a2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -40
app.py CHANGED
@@ -1,58 +1,82 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
 
 
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
  """
15
- Chat with a base LLM hosted on Hugging Face Hub.
16
- Uses streaming to show tokens as they arrive.
17
  """
18
- # Replace with a model you have access to, e.g. "meta-llama/Llama-2-7b-chat-hf"
19
- client = InferenceClient(model="openai/gpt-oss-20b", token=hf_token.token)
 
20
 
21
- messages = [{"role": "system", "content": system_message}]
22
- messages.extend(history)
23
- messages.append({"role": "user", "content": message})
24
 
25
- response = ""
26
- for chunk in client.chat_completion(
27
- messages,
28
- max_tokens=max_tokens,
29
  temperature=temperature,
30
  top_p=top_p,
31
  stream=True,
32
- ):
33
- if len(chunk.choices) and chunk.choices[0].delta.content:
34
- token = chunk.choices[0].delta.content
35
- response += token
36
- yield response
37
-
38
 
39
- chatbot = gr.ChatInterface(
40
- respond,
41
- type="messages",
42
- additional_inputs=[
43
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
44
- gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
45
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
46
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
47
- ],
48
- )
49
 
50
 
51
  with gr.Blocks() as demo:
52
- with gr.Sidebar():
53
- gr.LoginButton()
54
- chatbot.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
 
 
 
 
 
56
 
57
  if __name__ == "__main__":
58
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # Available open-source base models (completion style)
5
+ MODEL_CHOICES = {
6
+ "Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
7
+ "Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
8
+ "LLaMA-2 7B Chat (meta-llama/Llama-2-7b-chat-hf)": "meta-llama/Llama-2-7b-chat-hf",
9
+ }
10
 
11
+
12
+ def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token: gr.OAuthToken):
 
 
 
 
 
 
 
13
  """
14
+ Get a plain text completion from a Hugging Face-hosted open-source LLM.
15
+ Streams output token-by-token.
16
  """
17
+ if not hf_token or not hf_token.token:
18
+ yield "⚠️ Please log in with your Hugging Face account (for gated models like LLaMA-2)."
19
+ return
20
 
21
+ model_id = MODEL_CHOICES[model_choice]
22
+ client = InferenceClient(model=model_id, token=hf_token.token)
 
23
 
24
+ response_text = ""
25
+ stream = client.text_generation(
26
+ prompt,
27
+ max_new_tokens=max_tokens,
28
  temperature=temperature,
29
  top_p=top_p,
30
  stream=True,
31
+ repetition_penalty=1.0,
32
+ )
 
 
 
 
33
 
34
+ for event in stream:
35
+ # Each event is a string chunk
36
+ response_text += event
37
+ yield response_text
 
 
 
 
 
 
38
 
39
 
40
  with gr.Blocks() as demo:
41
+ gr.Markdown("## ✍️ Text Completion Demo with Open-Source Base LLMs")
42
+ gr.Markdown(
43
+ "Pick a model hosted on Hugging Face, enter a prompt, adjust decoding parameters, "
44
+ "and watch the model complete your text."
45
+ )
46
+
47
+ with gr.Row():
48
+ with gr.Column(scale=2):
49
+ prompt = gr.Textbox(
50
+ label="Prompt",
51
+ placeholder="Type the beginning of your text...",
52
+ lines=4,
53
+ )
54
+ max_tokens = gr.Slider(
55
+ minimum=1, maximum=1024, value=100, step=1, label="Max tokens"
56
+ )
57
+ temperature = gr.Slider(
58
+ minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature"
59
+ )
60
+ top_p = gr.Slider(
61
+ minimum=0.1, maximum=1.0, value=1.0, step=0.05, label="Top-p"
62
+ )
63
+ model_choice = gr.Dropdown(
64
+ choices=list(MODEL_CHOICES.keys()),
65
+ value=list(MODEL_CHOICES.keys())[0],
66
+ label="Choose a model",
67
+ )
68
+ submit = gr.Button("Generate Completion")
69
+ with gr.Column(scale=3):
70
+ output = gr.Textbox(
71
+ label="Generated Completion",
72
+ lines=15,
73
+ )
74
 
75
+ submit.click(
76
+ fn=complete_text,
77
+ inputs=[prompt, max_tokens, temperature, top_p, model_choice, gr.OAuthToken()],
78
+ outputs=output,
79
+ )
80
 
81
  if __name__ == "__main__":
82
+ demo.launch()