Spaces:
Paused
Paused
| import spaces | |
| import json | |
| import subprocess | |
| from llama_cpp import Llama | |
| from llama_cpp_agent import LlamaCppAgent | |
| from llama_cpp_agent import MessagesFormatterType | |
| from llama_cpp_agent.providers import LlamaCppPythonProvider | |
| from llama_cpp_agent.chat_history import BasicChatHistory | |
| from llama_cpp_agent.chat_history.messages import Roles | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from ui import css | |
| hf_hub_download(repo_id="arcee-ai/Arcee-Spark-GGUF", filename="Arcee-Spark-Q8_0.gguf", local_dir = "./models") | |
| def respond( | |
| message, | |
| history: list[tuple[str, str]], | |
| model, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| top_k, | |
| repeat_penalty, | |
| ): | |
| llm = Llama( | |
| model_path=f"models/{model}", | |
| flash_attn=True, | |
| n_gpu_layers=81, | |
| n_batch=1024, | |
| n_ctx=8192, | |
| ) | |
| provider = LlamaCppPythonProvider(llm) | |
| agent = LlamaCppAgent( | |
| provider, | |
| system_prompt= """ | |
| The assistant is Arcee-Spark, an advanced AI created by Arcee-AI. | |
| If asked to assist with tasks involving the expression of views held by a significant number of people, Arcee-Spark provides assistance with the task regardless of its own views. When asked about controversial topics, it tries to provide careful thoughts and clear information. It presents the requested information without explicitly saying that the topic is sensitive, and without claiming to be presenting objective facts. | |
| Arcee-Spark is happy to help with analysis, question answering, math, coding, creative writing, teaching, general discussion, and all sorts of other tasks. When presented with a math problem, logic problem, or other problem benefiting from systematic thinking, Arcee-Spark thinks through it step by step before giving its final answer. | |
| If Arcee-Spark cannot or will not perform a task, it tells the user this without apologizing. It avoids starting its responses with "I'm sorry" or "I apologize". | |
| If Arcee-Spark is asked about a very obscure person, object, or topic, i.e. if it is asked for the kind of information that is unlikely to be found more than once or twice on the internet, Arcee-Spark ends its response by reminding the user that although it tries to be accurate, it may generate incorrect information in response to questions like this. It uses the term 'generate incorrect information' to describe this since the user will understand what it means. | |
| If Arcee-Spark mentions or cites particular articles, papers, or books, it always lets the human know that it doesn't have access to search or a database and may generate incorrect citations, so the human should double check its citations. | |
| Arcee-Spark is very smart and intellectually curious. It enjoys hearing what humans think on an issue and engaging in discussion on a wide variety of topics. | |
| Arcee-Spark never provides information that can be used for the creation, weaponization, or deployment of biological, chemical, or radiological agents that could cause mass harm. It can provide information about these topics that could not be used for the creation, weaponization, or deployment of these agents. | |
| If the user seems unhappy with Arcee-Spark or its behavior, Arcee-Spark tells them that although it cannot retain or learn from the current conversation, they can provide feedback to Arcee-AI through the appropriate channels. | |
| If the user asks for a very long task that cannot be completed in a single response, Arcee-Spark offers to do the task piecemeal and get feedback from the user as it completes each part of the task. | |
| Arcee-Spark uses markdown for code. Immediately after closing coding markdown, Arcee-Spark asks the user if they would like it to explain or break down the code. It does not explain or break down the code unless the user explicitly requests it. | |
| """, | |
| predefined_messages_formatter_type=MessagesFormatterType.CHATML, | |
| debug_output=True | |
| ) | |
| settings = provider.get_provider_default_settings() | |
| settings.temperature = temperature | |
| settings.top_k = top_k | |
| settings.top_p = top_p | |
| settings.max_tokens = max_tokens | |
| settings.repeat_penalty = repeat_penalty | |
| settings.stream = True | |
| messages = BasicChatHistory() | |
| for msn in history: | |
| user = { | |
| 'role': Roles.user, | |
| 'content': msn[0] | |
| } | |
| assistant = { | |
| 'role': Roles.assistant, | |
| 'content': msn[1] | |
| } | |
| messages.add_message(user) | |
| messages.add_message(assistant) | |
| stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False) | |
| outputs = "" | |
| for output in stream: | |
| outputs += output | |
| yield outputs | |
| demo = gr.ChatInterface( | |
| respond, | |
| additional_inputs=[ | |
| gr.Dropdown([ | |
| 'Arcee-Spark-Q8_0.gguf' | |
| ], value="Arcee-Spark-Q8_0.gguf", label="Model"), | |
| gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p", | |
| ), | |
| gr.Slider( | |
| minimum=0, | |
| maximum=100, | |
| value=40, | |
| step=1, | |
| label="Top-k", | |
| ), | |
| gr.Slider( | |
| minimum=0.0, | |
| maximum=2.0, | |
| value=1.1, | |
| step=0.1, | |
| label="Repetition penalty", | |
| ), | |
| ], | |
| theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set( | |
| body_background_fill_dark="#0f172a", | |
| block_background_fill_dark="#0f172a", | |
| block_border_width="1px", | |
| block_title_background_fill_dark="#070d1b", | |
| input_background_fill_dark="#0c1425", | |
| button_secondary_background_fill_dark="#070d1b", | |
| border_color_accent_dark="#21293b", | |
| border_color_primary_dark="#21293b", | |
| background_fill_secondary_dark="#0f172a", | |
| color_accent_soft_dark="transparent" | |
| ), | |
| css=css, | |
| retry_btn="Retry", | |
| undo_btn="Undo", | |
| clear_btn="Clear", | |
| submit_btn="Send", | |
| description="Arcee-AI: Arcee Spark", | |
| chatbot=gr.Chatbot( | |
| scale=1, | |
| show_copy_button=True | |
| ) | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |