File size: 4,625 Bytes
72721bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
from openai import OpenAI, OpenAIError
# Global message history
history = []
# Main chatbot function
# Now accepts api_key provided by the user
def chatbot(user_input, api_key, temperature, top_p, max_tokens):
global history
# Ignore empty input
if not user_input:
return history, ''
# Instantiate OpenAI/NVIDIA client with user-provided key
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=api_key
)
# Add user message to history
history.append({"role": "user", "content": user_input})
# Ensure system message at start
if len(history) == 1:
history.insert(0, {
"role": "system",
"content": "You are a helpful assistant that explains complex topics clearly."
})
try:
# Stream response
response_stream = client.chat.completions.create(
model="meta/llama3-8b-instruct",
messages=history,
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
stream=True
)
assistant_reply = ""
for chunk in response_stream:
delta = chunk.choices[0].delta
if delta and delta.content:
assistant_reply += delta.content
except OpenAIError as e:
assistant_reply = f"⚠️ API Error: {e.__class__.__name__}: {e}"
# Store assistant response and prepare display history
history.append({"role": "assistant", "content": assistant_reply})
display = [
{"role": msg["role"], "content": msg["content"]}
for msg in history if msg["role"] in ["user", "assistant"]
]
return display, ''
# Clear conversation history
def clear_history():
global history
history = []
return [], ''
# Custom CSS for cleaner, centered layout
custom_css = r"""
#header {
text-align: center;
margin-bottom: 1rem;
}
#title {
font-size: 2rem;
margin: 0;
}
#chatbot {
border: none;
background-color: #f9f9f9;
}
footer {
visibility: hidden;
}
"""
with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo:
# Centered header
with gr.Row(elem_id="header"):
gr.Markdown("<h1 id='title'>🌐 GeoChat</h1>")
# Main layout: chat + settings
with gr.Row():
with gr.Column(scale=4, min_width=600):
chatbot_ui = gr.Chatbot(elem_id="chatbot", label="Assistant", height=500, type="messages")
with gr.Row():
txt = gr.Textbox(
placeholder="Type your question and press Send...",
show_label=False,
lines=2
)
btn = gr.Button("Send")
with gr.Row():
clear_btn = gr.Button("Clear")
with gr.Column(scale=1, min_width=200):
gr.Markdown(
"""
### 🔑 API Key
Get your NVIDIA API Key at [NVIDIA NGC API Keys](https://org.ngc.nvidia.com/setup/api-keys)
"""
)
api_key_input = gr.Textbox(
label="NVIDIA API Key",
placeholder="Enter your key here",
type="password",
show_label=True
)
gr.Markdown("### ⚙️ Settings")
temp_slider = gr.Slider(0, 1, value=0.6, label="Temperature")
top_p_slider = gr.Slider(0, 1, value=0.95, label="Top-p")
max_tokens_slider = gr.Slider(64, 2048, value=1024, step=64, label="Max Tokens")
gr.Markdown(
"""
**Temperature:** controls the randomness of the responses; lower values make output more deterministic.
**Top-p:** sets the cumulative probability for nucleus sampling; lower values focus on fewer tokens.
**Max Tokens:** maximum number of tokens the model can generate in the response.
"""
)
# Interaction events
btn.click(
fn=chatbot,
inputs=[txt, api_key_input, temp_slider, top_p_slider, max_tokens_slider],
outputs=[chatbot_ui, txt]
)
txt.submit(
fn=chatbot,
inputs=[txt, api_key_input, temp_slider, top_p_slider, max_tokens_slider],
outputs=[chatbot_ui, txt]
)
clear_btn.click(fn=clear_history, outputs=[chatbot_ui, txt])
# Run locally and open browser automatically
if __name__ == "__main__":
demo.launch()
|