xtreme86 commited on
Commit
60c03ab
·
1 Parent(s): b7e17f1
Files changed (1) hide show
  1. app.py +79 -32
app.py CHANGED
@@ -1,33 +1,80 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
-
4
- # Load a text generation pipeline with Flan-T5-small model
5
- llm_pipeline = pipeline(task="text-generation", model="google/flan-t5-small")
6
-
7
- # Function to generate text based on user input, with a system instruction
8
- def generate_text(prompt):
9
- system_instruction = "Respond like a helpful AI: " # System instruction
10
- full_prompt = system_instruction + prompt # Combine the instruction with user input
11
-
12
- result = llm_pipeline(
13
- full_prompt,
14
- max_length=50,
15
- num_return_sequences=1,
16
- temperature=0.7,
17
- top_k=50,
18
- top_p=0.9
19
- )
20
- return result[0]['generated_text']
21
-
22
- # Define the Gradio interface
23
- with gr.Blocks() as gradio_app:
24
- gr.Markdown("## Text Generation with Flan-T5-small (Helpful AI Mode)")
25
- prompt = gr.Textbox(label="Enter your prompt", placeholder="Type something here...")
26
- output = gr.Textbox(label="Generated text")
27
- submit_btn = gr.Button("Generate")
28
-
29
- # Set up the button to trigger text generation
30
- submit_btn.click(fn=generate_text, inputs=prompt, outputs=output)
31
-
32
- # Launch the app
33
- gradio_app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ """
5
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ """
7
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
+
9
+
10
+ def respond(
11
+ message: str,
12
+ history: list[tuple[str, str]],
13
+ system_message: str,
14
+ max_tokens: int,
15
+ temperature: float,
16
+ top_p: float,
17
+ ):
18
+ """
19
+ Generates a response using the Hugging Face Inference API.
20
+
21
+ Args:
22
+ message (str): User's current input.
23
+ history (list[tuple[str, str]]): Previous conversation history.
24
+ system_message (str): Instructions for the model (e.g., persona details).
25
+ max_tokens (int): Maximum tokens allowed for the response.
26
+ temperature (float): Sampling temperature for randomness in the output.
27
+ top_p (float): Top-p (nucleus) sampling parameter.
28
+
29
+ Yields:
30
+ str: The generated chatbot response.
31
+ """
32
+ messages = [{"role": "system", "content": system_message}]
33
+
34
+ for val in history:
35
+ if val[0]:
36
+ messages.append({"role": "user", "content": val[0]})
37
+ if val[1]:
38
+ messages.append({"role": "assistant", "content": val[1]})
39
+
40
+ messages.append({"role": "user", "content": message})
41
+
42
+ response = ""
43
+
44
+ try:
45
+ for message in client.chat_completion(
46
+ messages,
47
+ max_tokens=max_tokens,
48
+ stream=True,
49
+ temperature=temperature,
50
+ top_p=top_p,
51
+ ):
52
+ token = message.choices[0].delta.content
53
+ response += token
54
+ yield response
55
+ except Exception as e:
56
+ yield f"Error: {str(e)}"
57
+
58
+
59
+ """
60
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
61
+ """
62
+ demo = gr.ChatInterface(
63
+ respond,
64
+ additional_inputs=[
65
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
66
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
67
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
68
+ gr.Slider(
69
+ minimum=0.1,
70
+ maximum=1.0,
71
+ value=0.95,
72
+ step=0.05,
73
+ label="Top-p (nucleus sampling)",
74
+ ),
75
+ ],
76
+ )
77
+
78
+
79
+ if __name__ == "__main__":
80
+ demo.launch()