Nullpointer-KK commited on
Commit
6135f2e
·
verified ·
1 Parent(s): 1b56e81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -40
app.py CHANGED
@@ -1,46 +1,60 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- # Available open-source base models
5
  MODEL_CHOICES = {
6
  "Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
7
  "Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
8
- "LLaMA-2 7B Chat (meta-llama/Llama-2-7b-chat-hf)": "meta-llama/Llama-2-7b-chat-hf",
9
  }
10
 
11
-
12
- def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token: gr.OAuthToken):
 
 
 
 
 
 
13
  """
14
- Get a plain text completion from a Hugging Face-hosted open-source LLM.
15
- Streams output token-by-token.
16
  """
17
- if not hf_token or not hf_token.token:
18
- yield "⚠️ Please log in with your Hugging Face account (needed for gated models like LLaMA-2)."
19
  return
20
 
21
  model_id = MODEL_CHOICES[model_choice]
22
- client = InferenceClient(model=model_id, token=hf_token.token)
23
 
24
- response_text = ""
25
- stream = client.text_generation(
26
- prompt,
27
- max_new_tokens=max_tokens,
28
- temperature=temperature,
29
- top_p=top_p,
30
- stream=True,
31
- repetition_penalty=1.0,
32
- )
33
 
34
- for chunk in stream:
35
- response_text += chunk
36
- yield response_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
  with gr.Blocks() as demo:
40
- gr.Markdown("## ✍️ Text Completion Demo with Open-Source Base LLMs")
41
  gr.Markdown(
42
- "Pick a model hosted on Hugging Face, enter a prompt, adjust decoding parameters, "
43
- "and watch the model complete your text."
44
  )
45
 
46
  with gr.Row():
@@ -50,33 +64,27 @@ with gr.Blocks() as demo:
50
  placeholder="Type the beginning of your text...",
51
  lines=4,
52
  )
53
- max_tokens = gr.Slider(
54
- minimum=1, maximum=1024, value=100, step=1, label="Max tokens"
55
- )
56
- temperature = gr.Slider(
57
- minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature"
58
- )
59
- top_p = gr.Slider(
60
- minimum=0.1, maximum=1.0, value=1.0, step=0.05, label="Top-p"
61
- )
62
  model_choice = gr.Dropdown(
63
  choices=list(MODEL_CHOICES.keys()),
64
  value=list(MODEL_CHOICES.keys())[0],
65
  label="Choose a model",
66
  )
67
- submit = gr.Button("Generate Completion")
68
  with gr.Column(scale=3):
69
- output = gr.Textbox(
70
- label="Generated Completion",
71
- lines=15,
72
- )
73
 
74
- # Pass gr.OAuthToken as an input type (no manual instantiation)
75
  submit.click(
76
  fn=complete_text,
77
- inputs=[prompt, max_tokens, temperature, top_p, model_choice, gr.OAuthToken],
78
  outputs=output,
79
  )
80
 
81
  if __name__ == "__main__":
82
  demo.launch()
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # Pick three open-source models (note: some may require login/accept T&Cs)
5
  MODEL_CHOICES = {
6
  "Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
7
  "Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
8
+ "Zephyr 7B Beta (HuggingFaceH4/zephyr-7b-beta)": "HuggingFaceH4/zephyr-7b-beta",
9
  }
10
 
11
+ def complete_text(
12
+ prompt: str,
13
+ max_tokens: int,
14
+ temperature: float,
15
+ top_p: float,
16
+ model_choice: str,
17
+ hf_token: gr.OAuthToken, # <-- token comes from LoginButton
18
+ ):
19
  """
20
+ Stream a plain text completion from a HF-hosted open model.
 
21
  """
22
+ if not prompt:
23
+ yield "⚠️ Enter a prompt."
24
  return
25
 
26
  model_id = MODEL_CHOICES[model_choice]
 
27
 
28
+ # Some models require auth; if the user isn't logged in, warn them.
29
+ token_str = getattr(hf_token, "token", None)
30
+ if token_str is None:
31
+ yield "🔐 This Space uses the Hugging Face Inference API. Please click **Login** (left sidebar) to authorize."
32
+ return
 
 
 
 
33
 
34
+ client = InferenceClient(model=model_id, token=token_str)
35
+
36
+ try:
37
+ generated = ""
38
+ # text_generation returns an iterator of string chunks when stream=True
39
+ for chunk in client.text_generation(
40
+ prompt=prompt,
41
+ max_new_tokens=int(max_tokens),
42
+ temperature=float(temperature),
43
+ top_p=float(top_p),
44
+ stream=True,
45
+ repetition_penalty=1.0,
46
+ ):
47
+ generated += chunk
48
+ yield generated
49
+ except Exception as e:
50
+ yield f"❌ Inference error: {e}"
51
 
52
 
53
  with gr.Blocks() as demo:
54
+ gr.Markdown("## ✍️ Text Completion Demo (Hugging Face Inference API)")
55
  gr.Markdown(
56
+ "Pick a model, enter a prompt, and see a streamed completion. "
57
+ "Some models require you to **Login** (left sidebar) to access gated weights."
58
  )
59
 
60
  with gr.Row():
 
64
  placeholder="Type the beginning of your text...",
65
  lines=4,
66
  )
67
+ max_tokens = gr.Slider(1, 1024, value=100, step=1, label="Max new tokens")
68
+ temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature")
69
+ top_p = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Top-p")
 
 
 
 
 
 
70
  model_choice = gr.Dropdown(
71
  choices=list(MODEL_CHOICES.keys()),
72
  value=list(MODEL_CHOICES.keys())[0],
73
  label="Choose a model",
74
  )
75
+ submit = gr.Button("Generate")
76
  with gr.Column(scale=3):
77
+ with gr.Sidebar():
78
+ login = gr.LoginButton() # <-- THIS provides the OAuth token
79
+ output = gr.Textbox(label="Generated Completion", lines=15)
 
80
 
81
+ # Wire the login button as the input for the token parameter
82
  submit.click(
83
  fn=complete_text,
84
+ inputs=[prompt, max_tokens, temperature, top_p, model_choice, login],
85
  outputs=output,
86
  )
87
 
88
  if __name__ == "__main__":
89
  demo.launch()
90
+