pabloce commited on
Commit
a80e251
·
verified ·
1 Parent(s): a98a720

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +237 -0
app.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import logging
3
+ import gradio as gr
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ from llama_cpp import Llama
7
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
8
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
9
+ from llama_cpp_agent.chat_history import BasicChatHistory
10
+ from llama_cpp_agent.chat_history.messages import Roles
11
+ from llama_cpp_agent.llm_output_settings import (
12
+ LlmStructuredOutputSettings,
13
+ LlmStructuredOutputType,
14
+ )
15
+ from llama_cpp_agent.tools import WebSearchTool, GoogleWebSearchProvider
16
+ from llama_cpp_agent.prompt_templates import web_search_system_prompt, research_system_prompt
17
+ from lib.ui import css, PLACEHOLDER
18
+ from lib.utils import CitingSources
19
+ from lib.settings import get_context_by_model, get_messages_formatter_type
20
+
21
+ llm = None
22
+ llm_model = None
23
+
24
+ hf_hub_download(
25
+ repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
26
+ filename="Mistral-7B-Instruct-v0.3-Q6_K.gguf",
27
+ local_dir="./models"
28
+ )
29
+ hf_hub_download(
30
+ repo_id="bartowski/cognitivecomputations_Dolphin3.0-Mistral-24B-GGUF",
31
+ filename="cognitivecomputations_Dolphin3.0-Mistral-24B-Q8_0.gguf",
32
+ local_dir = "./models"
33
+ )
34
+ hf_hub_download(
35
+ repo_id="bartowski/gemma-2-27b-it-GGUF",
36
+ filename="gemma-2-27b-it-Q8_0.gguf",
37
+ local_dir = "./models"
38
+ )
39
+
40
+ examples = [
41
+ ["latest news about Yann LeCun"],
42
+ ["Latest news site:github.blog"],
43
+ ["Where I can find best hotel in Galapagos, Ecuador intitle:hotel"],
44
+ ["filetype:pdf intitle:python"]
45
+ ]
46
+
47
+ def write_message_to_user():
48
+ """
49
+ Let you write a message to the user.
50
+ """
51
+ return "Please write the message to the user."
52
+
53
+
54
+ @spaces.GPU(duration=120)
55
+ def respond(
56
+ message,
57
+ history: list[tuple[str, str]],
58
+ model,
59
+ system_message,
60
+ max_tokens,
61
+ temperature,
62
+ top_p,
63
+ top_k,
64
+ repeat_penalty,
65
+ ):
66
+ global llm
67
+ global llm_model
68
+ chat_template = get_messages_formatter_type(model)
69
+ if llm is None or llm_model != model:
70
+ llm = Llama(
71
+ model_path=f"models/{model}",
72
+ flash_attn=True,
73
+ n_gpu_layers=81,
74
+ n_batch=1024,
75
+ n_ctx=get_context_by_model(model),
76
+ )
77
+ llm_model = model
78
+ provider = LlamaCppPythonProvider(llm)
79
+ logging.info(f"Loaded chat examples: {chat_template}")
80
+ search_tool = WebSearchTool(
81
+ llm_provider=provider,
82
+ web_search_provider=GoogleWebSearchProvider(),
83
+ message_formatter_type=chat_template,
84
+ max_tokens_search_results=12000,
85
+ max_tokens_per_summary=2048,
86
+ )
87
+
88
+ web_search_agent = LlamaCppAgent(
89
+ provider,
90
+ system_prompt=web_search_system_prompt,
91
+ predefined_messages_formatter_type=chat_template,
92
+ debug_output=True,
93
+ )
94
+
95
+ answer_agent = LlamaCppAgent(
96
+ provider,
97
+ system_prompt=research_system_prompt,
98
+ predefined_messages_formatter_type=chat_template,
99
+ debug_output=True,
100
+ )
101
+
102
+ settings = provider.get_provider_default_settings()
103
+ settings.stream = False
104
+ settings.temperature = temperature
105
+ settings.top_k = top_k
106
+ settings.top_p = top_p
107
+
108
+ settings.max_tokens = max_tokens
109
+ settings.repeat_penalty = repeat_penalty
110
+
111
+ output_settings = LlmStructuredOutputSettings.from_functions(
112
+ [search_tool.get_tool()]
113
+ )
114
+
115
+ messages = BasicChatHistory()
116
+
117
+ for msn in history:
118
+ user = {"role": Roles.user, "content": msn[0]}
119
+ assistant = {"role": Roles.assistant, "content": msn[1]}
120
+ messages.add_message(user)
121
+ messages.add_message(assistant)
122
+
123
+ result = web_search_agent.get_chat_response(
124
+ message,
125
+ llm_sampling_settings=settings,
126
+ structured_output_settings=output_settings,
127
+ add_message_to_chat_history=False,
128
+ add_response_to_chat_history=False,
129
+ print_output=False,
130
+ )
131
+
132
+ outputs = ""
133
+
134
+ settings.stream = True
135
+ response_text = answer_agent.get_chat_response(
136
+ f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.\n\n" +
137
+ result[0]["return_value"],
138
+ role=Roles.tool,
139
+ llm_sampling_settings=settings,
140
+ chat_history=messages,
141
+ returns_streaming_generator=True,
142
+ print_output=False,
143
+ )
144
+
145
+ for text in response_text:
146
+ outputs += text
147
+ yield outputs
148
+
149
+ output_settings = LlmStructuredOutputSettings.from_pydantic_models(
150
+ [CitingSources], LlmStructuredOutputType.object_instance
151
+ )
152
+
153
+ citing_sources = answer_agent.get_chat_response(
154
+ "Cite the sources you used in your response.",
155
+ role=Roles.tool,
156
+ llm_sampling_settings=settings,
157
+ chat_history=messages,
158
+ returns_streaming_generator=False,
159
+ structured_output_settings=output_settings,
160
+ print_output=False,
161
+ )
162
+ outputs += "\n\nSources:\n"
163
+ outputs += "\n".join(citing_sources.sources)
164
+ yield outputs
165
+
166
+
167
+ demo = gr.ChatInterface(
168
+ respond,
169
+ additional_inputs=[
170
+ gr.Dropdown([
171
+ 'cognitivecomputations_Dolphin3.0-Mistral-24B-Q8_0.gguf',
172
+ 'Mistral-7B-Instruct-v0.3-Q6_K.gguf',
173
+ 'gemma-2-27b-it-Q8_0.gguf'
174
+ ],
175
+ value="Mistral-7B-Instruct-v0.3-Q6_K.gguf",
176
+ label="Model"
177
+ ),
178
+ gr.Textbox(value=web_search_system_prompt, label="System message"),
179
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
180
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.45, step=0.1, label="Temperature"),
181
+ gr.Slider(
182
+ minimum=0.1,
183
+ maximum=1.0,
184
+ value=0.95,
185
+ step=0.05,
186
+ label="Top-p",
187
+ ),
188
+ gr.Slider(
189
+ minimum=0,
190
+ maximum=100,
191
+ value=40,
192
+ step=1,
193
+ label="Top-k",
194
+ ),
195
+ gr.Slider(
196
+ minimum=0.0,
197
+ maximum=2.0,
198
+ value=1.1,
199
+ step=0.1,
200
+ label="Repetition penalty",
201
+ ),
202
+ ],
203
+ theme=gr.themes.Soft(
204
+ primary_hue="blue",
205
+ secondary_hue="blue",
206
+ neutral_hue="gray",
207
+ font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
208
+ body_background_fill_dark="#1f1f1f",
209
+ block_background_fill_dark="#1f1f1f",
210
+ block_border_width="1px",
211
+ block_title_background_fill_dark="#1f1f1f",
212
+ input_background_fill_dark="#202124",
213
+ button_secondary_background_fill_dark="#202124",
214
+ border_color_accent_dark="#3b3c3f",
215
+ border_color_primary_dark="#3b3c3f",
216
+ background_fill_secondary_dark="#1f1f1f",
217
+ color_accent_soft_dark="transparent",
218
+ code_background_fill_dark="#202124"
219
+ ),
220
+ css=css,
221
+ retry_btn="Retry",
222
+ undo_btn="Undo",
223
+ clear_btn="Clear",
224
+ submit_btn="Send",
225
+ cache_examples=False,
226
+ examples = (examples),
227
+ description="Llama-cpp-agent: Chat with Google Agent",
228
+ analytics_enabled=False,
229
+ chatbot=gr.Chatbot(
230
+ scale=1,
231
+ placeholder=PLACEHOLDER,
232
+ show_copy_button=True
233
+ )
234
+ )
235
+
236
+ if __name__ == "__main__":
237
+ demo.launch()