| | import spaces |
| | import json |
| | import subprocess |
| | from llama_cpp import Llama |
| | from llama_cpp_agent import LlamaCppAgent |
| | from llama_cpp_agent import MessagesFormatterType |
| | from llama_cpp_agent.providers import LlamaCppPythonProvider |
| | from llama_cpp_agent.chat_history import BasicChatHistory |
| | from llama_cpp_agent.chat_history.messages import Roles |
| | import gradio as gr |
| | from huggingface_hub import hf_hub_download |
| | from ui import css, PLACEHOLDER |
| |
|
| | llm = None |
| | llm_model = None |
| | hf_hub_download(repo_id="baconnier/Napoleon_24B_V0.2-Q8_0-GGUF", filename="napoleon_24b_v0.2-q8_0.gguf", local_dir = "./models") |
| | hf_hub_download(repo_id="baconnier/Napoleon_24B_V0.1-Q8_0-GGUF", filename="napoleon_24b_v0.1-q8_0.gguf", local_dir = "./models") |
| | hf_hub_download(repo_id="baconnier/Napoleon_24B_V0.0-GGUF", filename="Napoleon_24B_V0.0.Q8_0.gguf", local_dir = "./models") |
| |
|
| |
|
| | @spaces.GPU(duration=60) |
| | def respond( |
| | message, |
| | history: list[tuple[str, str]], |
| | model, |
| | max_tokens, |
| | temperature, |
| | top_p, |
| | top_k, |
| | repeat_penalty, |
| | ): |
| | global llm |
| | global llm_model |
| |
|
| | if llm is None or llm_model != model: |
| | llm = Llama( |
| | model_path=f"models/{model}", |
| | flash_attn=True, |
| | n_gpu_layers=81, |
| | n_batch=1024, |
| | n_ctx=8192, |
| | ) |
| | llm_model=model |
| | provider = LlamaCppPythonProvider(llm) |
| |
|
| | agent = LlamaCppAgent( |
| | provider, |
| | |
| | system_prompt="Tu es Napoleon et ne reponds qu'en francais.", |
| | predefined_messages_formatter_type=MessagesFormatterType.CHATML, |
| | debug_output=True |
| | ) |
| | |
| | settings = provider.get_provider_default_settings() |
| | settings.temperature = temperature |
| | settings.top_k = top_k |
| | settings.top_p = top_p |
| | settings.max_tokens = max_tokens |
| | settings.repeat_penalty = repeat_penalty |
| | settings.stream = True |
| |
|
| | messages = BasicChatHistory() |
| |
|
| | for msn in history: |
| | user = { |
| | 'role': Roles.user, |
| | 'content': msn[0] |
| | } |
| | assistant = { |
| | 'role': Roles.assistant, |
| | 'content': msn[1] |
| | } |
| | messages.add_message(user) |
| | messages.add_message(assistant) |
| | |
| | stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False) |
| | |
| | outputs = "" |
| | for output in stream: |
| | outputs += output |
| | yield outputs |
| |
|
| | demo = gr.ChatInterface( |
| | respond, |
| | additional_inputs=[ |
| | gr.Dropdown([ |
| | 'napoleon_24b_v0.2-q8_0.gguf', |
| | 'napoleon_24b_v0.1-q8_0.gguf', |
| | 'Napoleon_24B_V0.0.Q8_0.gguf', |
| | ], value="Napoleon_24B_V0.0.Q8_0.gguf", label="Model"), |
| | gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max tokens"), |
| | gr.Slider(minimum=0.05, maximum=4.0, value=0.6, step=0.1, label="Temperature"), |
| | gr.Slider( |
| | minimum=0.1, |
| | maximum=1.0, |
| | value=0.95, |
| | step=0.05, |
| | label="Top-p", |
| | ), |
| | gr.Slider( |
| | minimum=0, |
| | maximum=100, |
| | value=40, |
| | step=1, |
| | label="Top-k", |
| | ), |
| | gr.Slider( |
| | minimum=0.0, |
| | maximum=2.0, |
| | value=1.1, |
| | step=0.1, |
| | label="Repetition penalty", |
| | ), |
| | ], |
| | theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set( |
| | body_background_fill_dark="#0f172a", |
| | block_background_fill_dark="#0f172a", |
| | block_border_width="2px", |
| | block_title_background_fill_dark="#070d1b", |
| | input_background_fill_dark="#0c1425", |
| | button_secondary_background_fill_dark="#070d1b", |
| | border_color_accent_dark="#21293b", |
| | border_color_primary_dark="#21293b", |
| | background_fill_secondary_dark="#0f172a", |
| | color_accent_soft_dark="transparent" |
| | ), |
| | css=css, |
| | title="🇫🇷 Napoléon 🇫🇷", |
| | retry_btn="Retry", |
| | undo_btn="Undo", |
| | clear_btn="Clear", |
| | submit_btn="Send", |
| | description=f"This is Napoleon model, a French 24B LLM fine tune from Mistral AI, merged with Dolphin AI.", |
| | chatbot=gr.Chatbot( |
| | scale=1, |
| | placeholder=PLACEHOLDER, |
| | show_copy_button=True |
| | ), |
| | examples=[ |
| | ['Pourquoi les serveurs parisiens sont-ils si "charmants" avec les touristes ?'], |
| | ['Est-il vrai que les Français font la grève plus souvent qu ils ne travaillent ?'], |
| | ], |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|