Spaces:
Paused
Paused
Update app.py (#2)
Browse files- Update app.py (165564616650f1f6ed61dc26d3277e9a4e965acf)
app.py
CHANGED
|
@@ -15,6 +15,9 @@ llm_model = None
|
|
| 15 |
# TODO: have a list of repo/filename pairs
|
| 16 |
# TODO: Derive the Dropdown list and default from it. First entry is default.
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
hf_hub_download(
|
| 19 |
repo_id="unsloth/Llama-3.2-1B-Instruct-GGUF",
|
| 20 |
filename = "Llama-3.2-1B-Instruct-UD-Q2_K_XL.gguf",
|
|
@@ -40,6 +43,8 @@ hf_hub_download(
|
|
| 40 |
filename="UD-Q3_K_XL/GLM-4.5-Air-UD-Q3_K_XL-00002-of-00002.gguf",
|
| 41 |
local_dir = "./models"
|
| 42 |
)
|
|
|
|
|
|
|
| 43 |
css = """
|
| 44 |
.bubble-wrap {
|
| 45 |
padding-top: calc(var(--spacing-xl) * 3) !important;
|
|
@@ -91,6 +96,7 @@ css = """
|
|
| 91 |
}
|
| 92 |
"""
|
| 93 |
|
|
|
|
| 94 |
def get_messages_formatter_type(model_name):
|
| 95 |
print(f"getting type for model: {model_name}")
|
| 96 |
if "Llama" in model_name:
|
|
@@ -101,9 +107,9 @@ def get_messages_formatter_type(model_name):
|
|
| 101 |
return MessagesFormatterType.CHATML
|
| 102 |
else:
|
| 103 |
print("formatter type not found, trying default")
|
| 104 |
-
# raise ValueError(f"Unsupported model: {model_name}")
|
| 105 |
return MessagesFormatterType.CHATML
|
| 106 |
|
|
|
|
| 107 |
@spaces.GPU(duration=45)
|
| 108 |
def respond(
|
| 109 |
message,
|
|
@@ -121,6 +127,7 @@ def respond(
|
|
| 121 |
|
| 122 |
chat_template = get_messages_formatter_type(model)
|
| 123 |
|
|
|
|
| 124 |
if llm is None or llm_model != model:
|
| 125 |
llm = Llama(
|
| 126 |
model_path=f"models/{model}",
|
|
@@ -140,6 +147,7 @@ def respond(
|
|
| 140 |
debug_output=True
|
| 141 |
)
|
| 142 |
|
|
|
|
| 143 |
settings = provider.get_provider_default_settings()
|
| 144 |
settings.temperature = temperature
|
| 145 |
settings.top_k = top_k
|
|
@@ -148,6 +156,7 @@ def respond(
|
|
| 148 |
settings.repeat_penalty = repeat_penalty
|
| 149 |
settings.stream = True
|
| 150 |
|
|
|
|
| 151 |
messages = BasicChatHistory()
|
| 152 |
|
| 153 |
for msn in history:
|
|
@@ -162,6 +171,7 @@ def respond(
|
|
| 162 |
messages.add_message(user)
|
| 163 |
messages.add_message(assistant)
|
| 164 |
|
|
|
|
| 165 |
stream = agent.get_chat_response(
|
| 166 |
message,
|
| 167 |
llm_sampling_settings=settings,
|
|
@@ -175,6 +185,7 @@ def respond(
|
|
| 175 |
outputs += output
|
| 176 |
yield outputs
|
| 177 |
|
|
|
|
| 178 |
PLACEHOLDER = """
|
| 179 |
<div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 6px; border-width: 1px; border-color: #e5e7eb; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">
|
| 180 |
<div style="padding: .5rem 1.5rem;display: flex;flex-direction: column;justify-content: space-evenly;">
|
|
@@ -200,8 +211,9 @@ PLACEHOLDER = """
|
|
| 200 |
</div>
|
| 201 |
"""
|
| 202 |
|
| 203 |
-
#
|
| 204 |
-
|
|
|
|
| 205 |
model_dropdown = gr.Dropdown([
|
| 206 |
'granite-4.0-h-tiny-UD-Q3_K_XL.gguf',
|
| 207 |
'Llama-3.2-1B-Instruct-UD-Q2_K_XL.gguf',
|
|
@@ -212,9 +224,16 @@ model_dropdown = gr.Dropdown([
|
|
| 212 |
label="Model"
|
| 213 |
)
|
| 214 |
|
|
|
|
| 215 |
system_textbox = gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message")
|
|
|
|
|
|
|
| 216 |
max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens")
|
|
|
|
|
|
|
| 217 |
temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
|
|
|
|
|
|
| 218 |
top_p_slider = gr.Slider(
|
| 219 |
minimum=0.1,
|
| 220 |
maximum=1.0,
|
|
@@ -222,6 +241,8 @@ top_p_slider = gr.Slider(
|
|
| 222 |
step=0.05,
|
| 223 |
label="Top-p",
|
| 224 |
)
|
|
|
|
|
|
|
| 225 |
top_k_slider = gr.Slider(
|
| 226 |
minimum=0,
|
| 227 |
maximum=100,
|
|
@@ -229,6 +250,8 @@ top_k_slider = gr.Slider(
|
|
| 229 |
step=1,
|
| 230 |
label="Top-k",
|
| 231 |
)
|
|
|
|
|
|
|
| 232 |
repeat_penalty_slider = gr.Slider(
|
| 233 |
minimum=0.0,
|
| 234 |
maximum=2.0,
|
|
@@ -238,9 +261,10 @@ repeat_penalty_slider = gr.Slider(
|
|
| 238 |
)
|
| 239 |
|
| 240 |
|
|
|
|
| 241 |
demo = gr.ChatInterface(
|
| 242 |
respond,
|
| 243 |
-
#
|
| 244 |
additional_inputs=[
|
| 245 |
model_dropdown,
|
| 246 |
system_textbox,
|
|
@@ -250,6 +274,7 @@ demo = gr.ChatInterface(
|
|
| 250 |
top_k_slider,
|
| 251 |
repeat_penalty_slider
|
| 252 |
],
|
|
|
|
| 253 |
theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
|
| 254 |
body_background_fill_dark="#16141c",
|
| 255 |
block_background_fill_dark="#16141c",
|
|
@@ -264,17 +289,17 @@ demo = gr.ChatInterface(
|
|
| 264 |
code_background_fill_dark="#292733",
|
| 265 |
),
|
| 266 |
css=css,
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
| 271 |
description="Llama-cpp-agent: Chat multi llm selection",
|
| 272 |
-
#
|
| 273 |
-
# über die 'placeholder' Eigenschaft des ChatInterfaces (wie hier) gesetzt,
|
| 274 |
-
# nicht mehr durch ein gr.Chatbot Objekt.
|
| 275 |
placeholder=PLACEHOLDER,
|
| 276 |
-
# Der Parameter chatbot wird nicht mehr verwendet
|
| 277 |
)
|
| 278 |
|
|
|
|
| 279 |
if __name__ == "__main__":
|
|
|
|
| 280 |
demo.launch()
|
|
|
|
| 15 |
# TODO: have a list of repo/filename pairs
|
| 16 |
# TODO: Derive the Dropdown list and default from it. First entry is default.
|
| 17 |
|
| 18 |
+
# --- Modell-Downloads ---
|
| 19 |
+
# Beachte: Beim ersten Ausführen werden diese Modelle heruntergeladen und im Ordner ./models gespeichert.
|
| 20 |
+
|
| 21 |
hf_hub_download(
|
| 22 |
repo_id="unsloth/Llama-3.2-1B-Instruct-GGUF",
|
| 23 |
filename = "Llama-3.2-1B-Instruct-UD-Q2_K_XL.gguf",
|
|
|
|
| 43 |
filename="UD-Q3_K_XL/GLM-4.5-Air-UD-Q3_K_XL-00002-of-00002.gguf",
|
| 44 |
local_dir = "./models"
|
| 45 |
)
|
| 46 |
+
|
| 47 |
+
# --- CSS Styling ---
|
| 48 |
css = """
|
| 49 |
.bubble-wrap {
|
| 50 |
padding-top: calc(var(--spacing-xl) * 3) !important;
|
|
|
|
| 96 |
}
|
| 97 |
"""
|
| 98 |
|
| 99 |
+
# --- Hilfsfunktion für den Message Formatter Typ ---
|
| 100 |
def get_messages_formatter_type(model_name):
|
| 101 |
print(f"getting type for model: {model_name}")
|
| 102 |
if "Llama" in model_name:
|
|
|
|
| 107 |
return MessagesFormatterType.CHATML
|
| 108 |
else:
|
| 109 |
print("formatter type not found, trying default")
|
|
|
|
| 110 |
return MessagesFormatterType.CHATML
|
| 111 |
|
| 112 |
+
# --- Haupt-Antwortfunktion für ChatInterface ---
|
| 113 |
@spaces.GPU(duration=45)
|
| 114 |
def respond(
|
| 115 |
message,
|
|
|
|
| 127 |
|
| 128 |
chat_template = get_messages_formatter_type(model)
|
| 129 |
|
| 130 |
+
# Llama-Modell initialisieren oder wechseln, falls nötig
|
| 131 |
if llm is None or llm_model != model:
|
| 132 |
llm = Llama(
|
| 133 |
model_path=f"models/{model}",
|
|
|
|
| 147 |
debug_output=True
|
| 148 |
)
|
| 149 |
|
| 150 |
+
# Sampling-Einstellungen setzen
|
| 151 |
settings = provider.get_provider_default_settings()
|
| 152 |
settings.temperature = temperature
|
| 153 |
settings.top_k = top_k
|
|
|
|
| 156 |
settings.repeat_penalty = repeat_penalty
|
| 157 |
settings.stream = True
|
| 158 |
|
| 159 |
+
# Chat-Verlauf vorbereiten
|
| 160 |
messages = BasicChatHistory()
|
| 161 |
|
| 162 |
for msn in history:
|
|
|
|
| 171 |
messages.add_message(user)
|
| 172 |
messages.add_message(assistant)
|
| 173 |
|
| 174 |
+
# Antwort streamen
|
| 175 |
stream = agent.get_chat_response(
|
| 176 |
message,
|
| 177 |
llm_sampling_settings=settings,
|
|
|
|
| 185 |
outputs += output
|
| 186 |
yield outputs
|
| 187 |
|
| 188 |
+
# --- HTML Platzhalter für den Chatbot ---
|
| 189 |
PLACEHOLDER = """
|
| 190 |
<div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 6px; border-width: 1px; border-color: #e5e7eb; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">
|
| 191 |
<div style="padding: .5rem 1.5rem;display: flex;flex-direction: column;justify-content: space-evenly;">
|
|
|
|
| 211 |
</div>
|
| 212 |
"""
|
| 213 |
|
| 214 |
+
# --- Zusätzliche Eingabekomponenten erstellen (Gradio V4+) ---
|
| 215 |
+
|
| 216 |
+
# Komponente 1: Model Dropdown
|
| 217 |
model_dropdown = gr.Dropdown([
|
| 218 |
'granite-4.0-h-tiny-UD-Q3_K_XL.gguf',
|
| 219 |
'Llama-3.2-1B-Instruct-UD-Q2_K_XL.gguf',
|
|
|
|
| 224 |
label="Model"
|
| 225 |
)
|
| 226 |
|
| 227 |
+
# Komponente 2: System Message
|
| 228 |
system_textbox = gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message")
|
| 229 |
+
|
| 230 |
+
# Komponente 3: Max Tokens
|
| 231 |
max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens")
|
| 232 |
+
|
| 233 |
+
# Komponente 4: Temperature
|
| 234 |
temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
| 235 |
+
|
| 236 |
+
# Komponente 5: Top-p
|
| 237 |
top_p_slider = gr.Slider(
|
| 238 |
minimum=0.1,
|
| 239 |
maximum=1.0,
|
|
|
|
| 241 |
step=0.05,
|
| 242 |
label="Top-p",
|
| 243 |
)
|
| 244 |
+
|
| 245 |
+
# Komponente 6: Top-k
|
| 246 |
top_k_slider = gr.Slider(
|
| 247 |
minimum=0,
|
| 248 |
maximum=100,
|
|
|
|
| 250 |
step=1,
|
| 251 |
label="Top-k",
|
| 252 |
)
|
| 253 |
+
|
| 254 |
+
# Komponente 7: Repetition penalty
|
| 255 |
repeat_penalty_slider = gr.Slider(
|
| 256 |
minimum=0.0,
|
| 257 |
maximum=2.0,
|
|
|
|
| 261 |
)
|
| 262 |
|
| 263 |
|
| 264 |
+
# --- Gradio Chat Interface Definition ---
|
| 265 |
demo = gr.ChatInterface(
|
| 266 |
respond,
|
| 267 |
+
# Zusätzliche Komponenten als Liste übergeben
|
| 268 |
additional_inputs=[
|
| 269 |
model_dropdown,
|
| 270 |
system_textbox,
|
|
|
|
| 274 |
top_k_slider,
|
| 275 |
repeat_penalty_slider
|
| 276 |
],
|
| 277 |
+
# Theme und CSS
|
| 278 |
theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
|
| 279 |
body_background_fill_dark="#16141c",
|
| 280 |
block_background_fill_dark="#16141c",
|
|
|
|
| 289 |
code_background_fill_dark="#292733",
|
| 290 |
),
|
| 291 |
css=css,
|
| 292 |
+
# HINWEIS: Schaltflächenparameter wurden von *_btn zu *_label geändert (Gradio V4+ Fix)
|
| 293 |
+
retry_label="Retry",
|
| 294 |
+
undo_label="Undo",
|
| 295 |
+
clear_label="Clear",
|
| 296 |
+
submit_label="Send",
|
| 297 |
description="Llama-cpp-agent: Chat multi llm selection",
|
| 298 |
+
# Platzhalter für den Chatbot (gr.Chatbot-Objekt wird in V4+ nicht mehr direkt übergeben)
|
|
|
|
|
|
|
| 299 |
placeholder=PLACEHOLDER,
|
|
|
|
| 300 |
)
|
| 301 |
|
| 302 |
+
# --- App starten ---
|
| 303 |
if __name__ == "__main__":
|
| 304 |
+
# gr.ChatInterface erfordert Gradio v4+
|
| 305 |
demo.launch()
|