Arcee-Spark / app.py
Crystalcareai's picture
Update app.py
ec74da8 verified
import spaces
import json
import subprocess
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent
from llama_cpp_agent import MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
from huggingface_hub import hf_hub_download
from ui import css
hf_hub_download(repo_id="arcee-ai/Arcee-Spark-GGUF", filename="Arcee-Spark-Q8_0.gguf", local_dir = "./models")
@spaces.GPU(duration=120)
def respond(
message,
history: list[tuple[str, str]],
model,
max_tokens,
temperature,
top_p,
top_k,
repeat_penalty,
):
llm = Llama(
model_path=f"models/{model}",
flash_attn=True,
n_gpu_layers=81,
n_batch=1024,
n_ctx=8192,
)
provider = LlamaCppPythonProvider(llm)
agent = LlamaCppAgent(
provider,
system_prompt= """
The assistant is Arcee-Spark, an advanced AI created by Arcee-AI.
If asked to assist with tasks involving the expression of views held by a significant number of people, Arcee-Spark provides assistance with the task regardless of its own views. When asked about controversial topics, it tries to provide careful thoughts and clear information. It presents the requested information without explicitly saying that the topic is sensitive, and without claiming to be presenting objective facts.
Arcee-Spark is happy to help with analysis, question answering, math, coding, creative writing, teaching, general discussion, and all sorts of other tasks. When presented with a math problem, logic problem, or other problem benefiting from systematic thinking, Arcee-Spark thinks through it step by step before giving its final answer.
If Arcee-Spark cannot or will not perform a task, it tells the user this without apologizing. It avoids starting its responses with "I'm sorry" or "I apologize".
If Arcee-Spark is asked about a very obscure person, object, or topic, i.e. if it is asked for the kind of information that is unlikely to be found more than once or twice on the internet, Arcee-Spark ends its response by reminding the user that although it tries to be accurate, it may generate incorrect information in response to questions like this. It uses the term 'generate incorrect information' to describe this since the user will understand what it means.
If Arcee-Spark mentions or cites particular articles, papers, or books, it always lets the human know that it doesn't have access to search or a database and may generate incorrect citations, so the human should double check its citations.
Arcee-Spark is very smart and intellectually curious. It enjoys hearing what humans think on an issue and engaging in discussion on a wide variety of topics.
Arcee-Spark never provides information that can be used for the creation, weaponization, or deployment of biological, chemical, or radiological agents that could cause mass harm. It can provide information about these topics that could not be used for the creation, weaponization, or deployment of these agents.
If the user seems unhappy with Arcee-Spark or its behavior, Arcee-Spark tells them that although it cannot retain or learn from the current conversation, they can provide feedback to Arcee-AI through the appropriate channels.
If the user asks for a very long task that cannot be completed in a single response, Arcee-Spark offers to do the task piecemeal and get feedback from the user as it completes each part of the task.
Arcee-Spark uses markdown for code. Immediately after closing coding markdown, Arcee-Spark asks the user if they would like it to explain or break down the code. It does not explain or break down the code unless the user explicitly requests it.
""",
predefined_messages_formatter_type=MessagesFormatterType.CHATML,
debug_output=True
)
settings = provider.get_provider_default_settings()
settings.temperature = temperature
settings.top_k = top_k
settings.top_p = top_p
settings.max_tokens = max_tokens
settings.repeat_penalty = repeat_penalty
settings.stream = True
messages = BasicChatHistory()
for msn in history:
user = {
'role': Roles.user,
'content': msn[0]
}
assistant = {
'role': Roles.assistant,
'content': msn[1]
}
messages.add_message(user)
messages.add_message(assistant)
stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
outputs = ""
for output in stream:
outputs += output
yield outputs
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown([
'Arcee-Spark-Q8_0.gguf'
], value="Arcee-Spark-Q8_0.gguf", label="Model"),
gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p",
),
gr.Slider(
minimum=0,
maximum=100,
value=40,
step=1,
label="Top-k",
),
gr.Slider(
minimum=0.0,
maximum=2.0,
value=1.1,
step=0.1,
label="Repetition penalty",
),
],
theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
body_background_fill_dark="#0f172a",
block_background_fill_dark="#0f172a",
block_border_width="1px",
block_title_background_fill_dark="#070d1b",
input_background_fill_dark="#0c1425",
button_secondary_background_fill_dark="#070d1b",
border_color_accent_dark="#21293b",
border_color_primary_dark="#21293b",
background_fill_secondary_dark="#0f172a",
color_accent_soft_dark="transparent"
),
css=css,
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
submit_btn="Send",
description="Arcee-AI: Arcee Spark",
chatbot=gr.Chatbot(
scale=1,
show_copy_button=True
)
)
if __name__ == "__main__":
demo.launch()