d221 commited on
Commit
d7e41fd
·
verified ·
1 Parent(s): 4db1955

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from huggingface_hub import InferenceClient
4
+
5
+
6
+ AVAILABLE_MODELS = [
7
+ "bigscience/bloom-560m", # Smaller, faster version of BLOOM
8
+ "bigscience/bloom", # Original 176B parameter Bloom (heavy)
9
+ "openlm-research/open_llama_3b", # Smaller 3B LLaMA-like model
10
+ "openlm-research/open_llama_7b", # 7B LLaMA-like model
11
+ "tiiuae/falcon-7b-instruct", # Falcon 7B instruct
12
+ "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", # OpenAssistant 12B
13
+ # Add any other open-source models from HF you like
14
+ ]
15
+
16
+
17
+ def chat_with_model(
18
+ user_message, # The user’s message
19
+ history, # Chat history (handled automatically by Gradio ChatInterface)
20
+ system_message, # The system message/instructions from the left panel
21
+ user_api_key, # Optional user-provided HF API key
22
+ model_choice, # The model chosen from the dropdown
23
+ max_tokens, # Max new tokens
24
+ temperature, # Temperature
25
+ top_p # Top-p
26
+ ):
27
+ """
28
+ Called every time a user sends a new message.
29
+ Uses either the user’s provided HF API key or
30
+ does public inference (anonymous) if none is supplied.
31
+ """
32
+
33
+ # Decide which key to use
34
+ final_api_key = user_api_key.strip() if user_api_key else None # None -> public/no token
35
+
36
+ # Initialize InferenceClient with the chosen API key
37
+ client = InferenceClient(token=final_api_key)
38
+
39
+ # Build the prompt or system instruction
40
+ # You can handle chat format in a variety of ways.
41
+ # For simplicity, we do a naive approach here:
42
+ prompt = (
43
+ f"{system_message.strip()}\n\n" # System instructions
44
+ f"User: {user_message}\n"
45
+ "Assistant:"
46
+ )
47
+
48
+ # Set generation parameters
49
+ generation_params = dict(
50
+ temperature=temperature,
51
+ max_new_tokens=int(max_tokens),
52
+ top_p=top_p,
53
+ # Some open-source models do better with a smaller
54
+ # repetition_penalty or none at all:
55
+ repetition_penalty=1.0,
56
+ )
57
+
58
+ # Perform streaming text generation
59
+ partial_response = ""
60
+ stream = client.text_generation(
61
+ prompt=prompt,
62
+ model=model_choice, # The user's chosen model
63
+ stream=True,
64
+ details=True,
65
+ **generation_params
66
+ )
67
+ for chunk in stream:
68
+ if chunk.token.special:
69
+ continue
70
+ partial_response += chunk.token.text
71
+ yield partial_response
72
+
73
+
74
+ with gr.Blocks(theme="soft") as demo:
75
+ # Title
76
+ gr.Markdown(
77
+ """
78
+ <h1 style="text-align:center; margin-bottom: 5px;">
79
+ <b>Open-Source GPT Chatbot</b>
80
+ </h1>
81
+ """,
82
+ elem_id="title"
83
+ )
84
+
85
+ with gr.Row():
86
+ # Left Column: system msg, HF API key, model dropdown, sliders, etc.
87
+ with gr.Column(scale=1, min_width=270):
88
+ system_message = gr.Textbox(
89
+ label="System Message",
90
+ value="You are a helpful open-source AI assistant."
91
+ )
92
+
93
+ user_api_key = gr.Textbox(
94
+ label="Hugging Face API Key (optional)",
95
+ type="password",
96
+ placeholder="Leave blank for public/anonymous usage"
97
+ )
98
+
99
+ model_choice = gr.Dropdown(
100
+ label="Select Open-Source Model",
101
+ choices=AVAILABLE_MODELS,
102
+ value=AVAILABLE_MODELS[0], # Default to first in list
103
+ )
104
+
105
+ max_tokens = gr.Slider(
106
+ minimum=1,
107
+ maximum=2000,
108
+ step=1,
109
+ value=512,
110
+ label="Max new tokens"
111
+ )
112
+ temperature = gr.Slider(
113
+ minimum=0.1,
114
+ maximum=2.0,
115
+ value=0.7,
116
+ step=0.1,
117
+ label="Temperature"
118
+ )
119
+ top_p = gr.Slider(
120
+ minimum=0.1,
121
+ maximum=1.0,
122
+ value=0.9,
123
+ step=0.01,
124
+ label="Top-p (nucleus sampling)"
125
+ )
126
+
127
+ # Right Column: The chat interface
128
+ with gr.Column(scale=3):
129
+ chatbot = gr.ChatInterface(
130
+ fn=chat_with_model,
131
+ # Additional inputs needed by chat_with_model:
132
+ additional_inputs=[system_message, user_api_key, model_choice,
133
+ max_tokens, temperature, top_p],
134
+ type="messages", # Use newer messages format
135
+ height=550,
136
+ title="Open-Source Chatbot"
137
+ )
138
+
139
+ # Launch the Gradio app
140
+ demo.launch()