Spaces:

TobDeBer
/

AdvancedZero

Paused

File size: 7,372 Bytes

21b8ce0
13e498a
17dc147
53a8258
 
 
6e7a7b9
615a20b
6e7a7b9
 
 
d8a3c53
17dc147
53a8258
40afde6
1d9807e
a15f664
 
17dc147
 
 
0e76b02
 
 
670bd04
 
1d9807e
 
0e76b02
1d9807e
670bd04
0e76b02
1d9807e
53a8258
1d9807e
 
2d1ec07
d8a3c53
 
1d9807e
 
d8a3c53
 
 
 
0824852
 
d8a3c53
a15f664
 
 
17dc147
1d9807e
17dc147
53a8258
17dc147
 
 
 
53a8258
1d9807e
 
 
 
 
 
 
 
 
 
53a8258
a15f664
5c89384
cf6a52f
 
11987d3
0e76b02
cf6a52f
8e6bf26
17dc147
cf6a52f
5c89384
 
 
e9aaf81
5c89384
cf6a52f
ac70b49
 
 
1d9807e
 
17dc147
0e76b02
 
 
 
 
 
 
17dc147
559c9c0
8e6bf26
b9838b1
559c9c0
606c0ce
53a8258
1d9807e
53a8258
1d9807e
 
17dc147
 
1d9807e
2bfa2c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8a3c53
 
4b81451
17dc147
d8a3c53
2bfa2c0
 
 
 
 
 
 
d8a3c53
6ccc337
15a7813
5eb0b07
5c900a6
ed82b9d
9a1c399
736d872
c940f4e
2110ec0
7c944d3
c940f4e
 
905a689
f97343d
53a8258
d8a3c53
 
 
1d9807e
 
 
53a8258

import spaces
import json
import os
import glob
import subprocess

from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
from huggingface_hub import hf_hub_download, list_repo_files
from model_loader import MODEL_DROPDOWN_CHOICES, MODEL_FILE_MAPPING

# --- Globale Konfiguration und Variablen ---
llm = None
llm_model = None

css = """.bubble-wrap {    padding-top: calc(var(--spacing-xl) * 3) !important;}.message-row {    justify-content: space-evenly !important;    width: 100% !important;    max-width: 100% !important;    margin: calc(var(--spacing-xl)) 0 !important;    padding: 0 calc(var(--spacing-xl) * 3) !important;}.flex-wrap.user {    border-bottom-right-radius: var(--radius-lg) !important;}.flex-wrap.bot {    border-bottom-left-radius: var(--radius-lg) !important;}.message.user{    padding: 10px;}.message.bot{    text-align: right;    width: 100%;    padding: 10px;    border-radius: 10px;}.message-bubble-border {    border-radius: 6px !important;}.message-buttons {    justify-content: flex-end !important;}.message-buttons-left {    align-self: end !important;}.message-buttons-bot, .message-buttons-user {    right: 10px !important;    left: auto !important;    bottom: 2px !important;}.dark.message-bubble-border {    border-color: #343140 !important;}.dark.user {    background: #1e1c26 !important;}.dark.assistant.dark, .dark.pending.dark {    background: #16141c !important;}"""

def get_messages_formatter_type(model_name):
    if "Llama" in model_name:
        return MessagesFormatterType.LLAMA_3
    elif "Mistral" in model_name:
        return MessagesFormatterType.MISTRAL
    elif "GLM" in model_name or "Granite" in model_name: 
        return MessagesFormatterType.CHATML
    else:
        print("Formatter type not found, trying default")
        return MessagesFormatterType.CHATML

# ----------------------------------------------------------------------
## Main Response Function for ChatInterface
# ----------------------------------------------------------------------

@spaces.GPU(duration=90)
def respond(
    message,
    history: list[dict[str, str]], 
    selected_model_name, 
    system_message,
    max_tokens,
    temperature,
    top_p,
    top_k,
    repeat_penalty,
):
    global llm
    global llm_model
    
    model_file_path = MODEL_FILE_MAPPING.get(selected_model_name)
    
    if not model_file_path:
        return f"Error: Model file for '{selected_model_name}' not found. Has the download completed?"

    chat_template = get_messages_formatter_type(selected_model_name)

    if llm is None or llm_model != model_file_path:
        print(f"Loading new model: {model_file_path}")
        try:
            llm = Llama(
                model_path=model_file_path, 
                flash_attn=True,
                n_gpu_layers=81,
                n_batch=1024,
                n_ctx=8192,
            )
            llm_model = model_file_path
        except Exception as e:
            return f"Error during loading of Llama model '{selected_model_name}' ({model_file_path}): {e}"
    
    provider = LlamaCppPythonProvider(llm)
    agent = LlamaCppAgent(
        provider,
        system_prompt=f"{system_message}",
        predefined_messages_formatter_type=chat_template,
        debug_output=True
    )

    settings = provider.get_provider_default_settings()
    settings.temperature = temperature
    settings.top_k = top_k
    settings.top_p = top_p
    settings.max_tokens = max_tokens
    settings.repeat_penalty = repeat_penalty
    settings.stream = True

    messages = BasicChatHistory()
    for msn in history:
        role = Roles.user if msn.get('role') == 'user' else Roles.assistant
        messages.add_message({'role': role, 'content': msn.get('content', '')})

    stream = agent.get_chat_response(
        message,
        llm_sampling_settings=settings,
        chat_history=messages,
        returns_streaming_generator=True,
        print_output=False
    )
        
    outputs = ""
    for output in stream:
        outputs += output
        yield outputs

PLACEHOLDER = """<div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 6px; border-width: 1px; border-color: #e5e7eb; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">    <div style="padding: .5rem 1.5rem;display: flex;flex-direction: column;justify-content: space-evenly;">        <h2 style="text-align: left; font-size: 1.5rem; font-weight: 700; margin-bottom: 0.5rem;">llama.cpp based quantized gguf inference</h2> <p style="text-align: left; font-size: 16px; line-height: 1.5; margin-bottom: 15px;">This space hosts an Advanced model.</p>  </div></div>"""

# --- Gradio Components (Dynamically populated) ---
default_model = MODEL_DROPDOWN_CHOICES[0] if MODEL_DROPDOWN_CHOICES else None 

model_dropdown = gr.Dropdown(
    choices=MODEL_DROPDOWN_CHOICES, 
    value=default_model,
    label="Model"
)
system_textbox = gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message")
max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens")
temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p_slider = gr.Slider(
    minimum=0.1,
    maximum=1.0,
    value=0.95,
    step=0.05,
    label="Top-p",
)
top_k_slider = gr.Slider(
    minimum=0,
    maximum=100,
    value=40,
    step=1,
    label="Top-k",
)
repeat_penalty_slider = gr.Slider(
    minimum=0.0,
    maximum=2.0,
    value=1.1,
    step=0.1,
    label="Repetition penalty",
)

demo = gr.ChatInterface(
    respond,
    type="messages",
    chatbot=gr.Chatbot(placeholder=PLACEHOLDER, height=450, type="messages", label=False),
    additional_inputs=[
        model_dropdown,
        system_textbox,
        max_tokens_slider,
        temperature_slider,
        top_p_slider,
        top_k_slider,
        repeat_penalty_slider
    ],
    theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
        body_background_fill_dark="#16141c",
        block_background_fill_dark="#16141c",
        block_border_width="1px",
        block_title_background_fill_dark="#1e1c26",
        input_background_fill_dark="#292733",
        button_secondary_background_fill_dark="#24212b",
        border_color_accent_dark="#343140",
        border_color_primary_dark="#343140",
        background_fill_secondary_dark="#16141c",
        color_accent_soft_dark="transparent",
        code_background_fill_dark="#292733",
    ),
    css=css,
    description="Advanced model",
)

if __name__ == "__main__":
    if default_model:
        demo.launch()
    else:
        print("Could not load any models or configure. App will not start.")