Chengdong CAO commited on
Commit
3335d1e
·
1 Parent(s): fe57f0f
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +314 -0
  3. requirements.txt +12 -0
README.md CHANGED
@@ -10,4 +10,4 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
10
  license: mit
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import json
2
+ # import os
3
+
4
+ # import gradio as gr
5
+ # import requests
6
+ # from lagent.schema import AgentStatusCode
7
+
8
+ # os.system("python -m mindsearch.app --lang cn --model_format internlm_silicon &")
9
+
10
+ # PLANNER_HISTORY = []
11
+ # SEARCHER_HISTORY = []
12
+
13
+
14
+ # def rst_mem(history_planner: list, history_searcher: list):
15
+ # '''
16
+ # Reset the chatbot memory.
17
+ # '''
18
+ # history_planner = []
19
+ # history_searcher = []
20
+ # if PLANNER_HISTORY:
21
+ # PLANNER_HISTORY.clear()
22
+ # return history_planner, history_searcher
23
+
24
+
25
+ # def format_response(gr_history, agent_return):
26
+ # if agent_return['state'] in [
27
+ # AgentStatusCode.STREAM_ING, AgentStatusCode.ANSWER_ING
28
+ # ]:
29
+ # gr_history[-1][1] = agent_return['response']
30
+ # elif agent_return['state'] == AgentStatusCode.PLUGIN_START:
31
+ # thought = gr_history[-1][1].split('```')[0]
32
+ # if agent_return['response'].startswith('```'):
33
+ # gr_history[-1][1] = thought + '\n' + agent_return['response']
34
+ # elif agent_return['state'] == AgentStatusCode.PLUGIN_END:
35
+ # thought = gr_history[-1][1].split('```')[0]
36
+ # if isinstance(agent_return['response'], dict):
37
+ # gr_history[-1][
38
+ # 1] = thought + '\n' + f'```json\n{json.dumps(agent_return["response"], ensure_ascii=False, indent=4)}\n```' # noqa: E501
39
+ # elif agent_return['state'] == AgentStatusCode.PLUGIN_RETURN:
40
+ # assert agent_return['inner_steps'][-1]['role'] == 'environment'
41
+ # item = agent_return['inner_steps'][-1]
42
+ # gr_history.append([
43
+ # None,
44
+ # f"```json\n{json.dumps(item['content'], ensure_ascii=False, indent=4)}\n```"
45
+ # ])
46
+ # gr_history.append([None, ''])
47
+ # return
48
+
49
+
50
+ # def predict(history_planner, history_searcher):
51
+
52
+ # def streaming(raw_response):
53
+ # for chunk in raw_response.iter_lines(chunk_size=8192,
54
+ # decode_unicode=False,
55
+ # delimiter=b'\n'):
56
+ # if chunk:
57
+ # decoded = chunk.decode('utf-8')
58
+ # if decoded == '\r':
59
+ # continue
60
+ # if decoded[:6] == 'data: ':
61
+ # decoded = decoded[6:]
62
+ # elif decoded.startswith(': ping - '):
63
+ # continue
64
+ # response = json.loads(decoded)
65
+ # yield (response['response'], response['current_node'])
66
+
67
+ # global PLANNER_HISTORY
68
+ # PLANNER_HISTORY.append(dict(role='user', content=history_planner[-1][0]))
69
+ # new_search_turn = True
70
+
71
+ # url = 'http://localhost:8002/solve'
72
+ # headers = {'Content-Type': 'application/json'}
73
+ # data = {'inputs': PLANNER_HISTORY}
74
+ # raw_response = requests.post(url,
75
+ # headers=headers,
76
+ # data=json.dumps(data),
77
+ # timeout=20,
78
+ # stream=True)
79
+
80
+ # for resp in streaming(raw_response):
81
+ # agent_return, node_name = resp
82
+ # if node_name:
83
+ # if node_name in ['root', 'response']:
84
+ # continue
85
+ # agent_return = agent_return['nodes'][node_name]['detail']
86
+ # if new_search_turn:
87
+ # history_searcher.append([agent_return['content'], ''])
88
+ # new_search_turn = False
89
+ # format_response(history_searcher, agent_return)
90
+ # if agent_return['state'] == AgentStatusCode.END:
91
+ # new_search_turn = True
92
+ # yield history_planner, history_searcher
93
+ # else:
94
+ # new_search_turn = True
95
+ # format_response(history_planner, agent_return)
96
+ # if agent_return['state'] == AgentStatusCode.END:
97
+ # PLANNER_HISTORY = agent_return['inner_steps']
98
+ # yield history_planner, history_searcher
99
+ # return history_planner, history_searcher
100
+
101
+
102
+ # with gr.Blocks() as demo:
103
+ # gr.HTML("""<h1 align="center">MindSearch Gradio Demo</h1>""")
104
+ # gr.HTML("""<p style="text-align: center; font-family: Arial, sans-serif;">MindSearch is an open-source AI Search Engine Framework with Perplexity.ai Pro performance. You can deploy your own Perplexity.ai-style search engine using either closed-source LLMs (GPT, Claude) or open-source LLMs (InternLM2.5-7b-chat).</p>""")
105
+ # gr.HTML("""
106
+ # <div style="text-align: center; font-size: 16px;">
107
+ # <a href="https://github.com/InternLM/MindSearch" style="margin-right: 15px; text-decoration: none; color: #4A90E2;">🔗 GitHub</a>
108
+ # <a href="https://arxiv.org/abs/2407.20183" style="margin-right: 15px; text-decoration: none; color: #4A90E2;">📄 Arxiv</a>
109
+ # <a href="https://huggingface.co/papers/2407.20183" style="margin-right: 15px; text-decoration: none; color: #4A90E2;">📚 Hugging Face Papers</a>
110
+ # <a href="https://huggingface.co/spaces/internlm/MindSearch" style="text-decoration: none; color: #4A90E2;">🤗 Hugging Face Demo</a>
111
+ # </div>
112
+ # """)
113
+ # with gr.Row():
114
+ # with gr.Column(scale=10):
115
+ # with gr.Row():
116
+ # with gr.Column():
117
+ # planner = gr.Chatbot(label='planner',
118
+ # height=700,
119
+ # show_label=True,
120
+ # show_copy_button=True,
121
+ # bubble_full_width=False,
122
+ # render_markdown=True)
123
+ # with gr.Column():
124
+ # searcher = gr.Chatbot(label='searcher',
125
+ # height=700,
126
+ # show_label=True,
127
+ # show_copy_button=True,
128
+ # bubble_full_width=False,
129
+ # render_markdown=True)
130
+ # with gr.Row():
131
+ # user_input = gr.Textbox(show_label=False,
132
+ # placeholder='帮我搜索一下 InternLM 开源体系',
133
+ # lines=5,
134
+ # container=False)
135
+ # with gr.Row():
136
+ # with gr.Column(scale=2):
137
+ # submitBtn = gr.Button('Submit')
138
+ # with gr.Column(scale=1, min_width=20):
139
+ # emptyBtn = gr.Button('Clear History')
140
+
141
+ # def user(query, history):
142
+ # return '', history + [[query, '']]
143
+
144
+ # submitBtn.click(user, [user_input, planner], [user_input, planner],
145
+ # queue=False).then(predict, [planner, searcher],
146
+ # [planner, searcher])
147
+ # emptyBtn.click(rst_mem, [planner, searcher], [planner, searcher],
148
+ # queue=False)
149
+
150
+ # demo.queue()
151
+ # demo.launch(server_name='0.0.0.0',
152
+ # server_port=7860,
153
+ # inbrowser=True,
154
+ # share=True)
155
+
156
+
157
+ import json
158
+ import os
159
+
160
+ import gradio as gr
161
+ import requests
162
+ from lagent.schema import AgentStatusCode
163
+
164
+ os.system("python -m mindsearch.app --lang cn --model_format internlm_silicon &")
165
+
166
+ PLANNER_HISTORY = []
167
+ SEARCHER_HISTORY = []
168
+
169
+
170
+ def rst_mem(history_planner: list, history_searcher: list):
171
+ '''
172
+ Reset the chatbot memory.
173
+ '''
174
+ history_planner = []
175
+ history_searcher = []
176
+ if PLANNER_HISTORY:
177
+ PLANNER_HISTORY.clear()
178
+ return history_planner, history_searcher
179
+
180
+
181
+ def format_response(gr_history, agent_return):
182
+ if agent_return['state'] in [
183
+ AgentStatusCode.STREAM_ING, AgentStatusCode.ANSWER_ING
184
+ ]:
185
+ gr_history[-1][1] = agent_return['response']
186
+ elif agent_return['state'] == AgentStatusCode.PLUGIN_START:
187
+ thought = gr_history[-1][1].split('```')[0]
188
+ if agent_return['response'].startswith('```'):
189
+ gr_history[-1][1] = thought + '\n' + agent_return['response']
190
+ elif agent_return['state'] == AgentStatusCode.PLUGIN_END:
191
+ thought = gr_history[-1][1].split('```')[0]
192
+ if isinstance(agent_return['response'], dict):
193
+ gr_history[-1][
194
+ 1] = thought + '\n' + f'```json\n{json.dumps(agent_return["response"], ensure_ascii=False, indent=4)}\n```' # noqa: E501
195
+ elif agent_return['state'] == AgentStatusCode.PLUGIN_RETURN:
196
+ assert agent_return['inner_steps'][-1]['role'] == 'environment'
197
+ item = agent_return['inner_steps'][-1]
198
+ gr_history.append([
199
+ None,
200
+ f"```json\n{json.dumps(item['content'], ensure_ascii=False, indent=4)}\n```"
201
+ ])
202
+ gr_history.append([None, ''])
203
+ return
204
+
205
+
206
+ def predict(history_planner, history_searcher):
207
+
208
+ def streaming(raw_response):
209
+ for chunk in raw_response.iter_lines(chunk_size=8192,
210
+ decode_unicode=False,
211
+ delimiter=b'\n'):
212
+ if chunk:
213
+ decoded = chunk.decode('utf-8')
214
+ if decoded == '\r':
215
+ continue
216
+ if decoded[:6] == 'data: ':
217
+ decoded = decoded[6:]
218
+ elif decoded.startswith(': ping - '):
219
+ continue
220
+ response = json.loads(decoded)
221
+ yield (response['response'], response['current_node'])
222
+
223
+ global PLANNER_HISTORY
224
+ PLANNER_HISTORY.append(dict(role='user', content=history_planner[-1][0]))
225
+ new_search_turn = True
226
+
227
+ url = 'http://localhost:8002/solve'
228
+ headers = {'Content-Type': 'application/json'}
229
+ data = {'inputs': PLANNER_HISTORY}
230
+ raw_response = requests.post(url,
231
+ headers=headers,
232
+ data=json.dumps(data),
233
+ timeout=20,
234
+ stream=True)
235
+
236
+ for resp in streaming(raw_response):
237
+ agent_return, node_name = resp
238
+ if node_name:
239
+ if node_name in ['root', 'response']:
240
+ continue
241
+ agent_return = agent_return['nodes'][node_name]['detail']
242
+ if new_search_turn:
243
+ history_searcher.append([agent_return['content'], ''])
244
+ new_search_turn = False
245
+ format_response(history_searcher, agent_return)
246
+ if agent_return['state'] == AgentStatusCode.END:
247
+ new_search_turn = True
248
+ yield history_planner, history_searcher
249
+ else:
250
+ new_search_turn = True
251
+ format_response(history_planner, agent_return)
252
+ if agent_return['state'] == AgentStatusCode.END:
253
+ PLANNER_HISTORY = agent_return['inner_steps']
254
+ yield history_planner, history_searcher
255
+ return history_planner, history_searcher
256
+
257
+
258
+ with gr.Blocks(css=".gradio-container {background-color: #f9f9f9;} .gr-button {background-color: #4A90E2; color: white; border-radius: 10px;} .gr-textbox {border: 2px solid #4A90E2; border-radius: 5px;} .gr-row {margin-bottom: 10px;}") as demo:
259
+ gr.HTML("""<h1 align="center" style="color:#4A90E2;">MindSearch Gradio Demo</h1>""")
260
+ gr.HTML("""<p style="text-align: center; font-family: Arial, sans-serif; color: #333;">MindSearch is an open-source AI Search Engine Framework with Perplexity.ai Pro performance. You can deploy your own Perplexity.ai-style search engine using either closed-source LLMs (GPT, Claude) or open-source LLMs (InternLM2.5-7b-chat).</p>""")
261
+ gr.HTML("""
262
+ <div style="text-align: center; font-size: 16px; margin-bottom: 20px;">
263
+ <a href="https://github.com/InternLM/MindSearch" style="margin-right: 15px; text-decoration: none; color: #4A90E2;">🔗 GitHub</a>
264
+ <a href="https://arxiv.org/abs/2407.20183" style="margin-right: 15px; text-decoration: none; color: #4A90E2;">📄 Arxiv</a>
265
+ <a href="https://huggingface.co/papers/2407.20183" style="margin-right: 15px; text-decoration: none; color: #4A90E2;">📚 Hugging Face Papers</a>
266
+ <a href="https://huggingface.co/spaces/internlm/MindSearch" style="text-decoration: none; color: #4A90E2;">🤗 Hugging Face Demo</a>
267
+ </div>
268
+ """)
269
+ with gr.Row():
270
+ with gr.Column(scale=10):
271
+ with gr.Row():
272
+ with gr.Column():
273
+ planner = gr.Chatbot(label='Planner',
274
+ height=700,
275
+ show_label=True,
276
+ show_copy_button=True,
277
+ bubble_full_width=True,
278
+ render_markdown=True,
279
+ container=False,
280
+ elem_id="planner")
281
+ with gr.Column():
282
+ searcher = gr.Chatbot(label='Searcher',
283
+ height=700,
284
+ show_label=True,
285
+ show_copy_button=True,
286
+ bubble_full_width=True,
287
+ render_markdown=True,
288
+ container=False,
289
+ elem_id="searcher")
290
+ with gr.Row():
291
+ user_input = gr.Textbox(show_label=False,
292
+ placeholder='帮我搜索一下 InternLM 开源体系',
293
+ lines=5,
294
+ container=False)
295
+ with gr.Row():
296
+ with gr.Column(scale=2):
297
+ submitBtn = gr.Button('Submit', elem_id="submitBtn")
298
+ with gr.Column(scale=1, min_width=20):
299
+ emptyBtn = gr.Button('Clear History', elem_id="emptyBtn")
300
+
301
+ def user(query, history):
302
+ return '', history + [[query, '']]
303
+
304
+ submitBtn.click(user, [user_input, planner], [user_input, planner],
305
+ queue=False).then(predict, [planner, searcher],
306
+ [planner, searcher])
307
+ emptyBtn.click(rst_mem, [planner, searcher], [planner, searcher],
308
+ queue=False)
309
+
310
+ demo.queue()
311
+ demo.launch(server_name='0.0.0.0',
312
+ server_port=7860,
313
+ inbrowser=True,
314
+ share=True)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ duckduckgo_search==5.3.1b1
2
+ einops
3
+ fastapi
4
+ git+https://github.com/InternLM/lagent.git
5
+ gradio
6
+ janus
7
+ lmdeploy
8
+ pyvis
9
+ sse-starlette
10
+ termcolor
11
+ transformers==4.41.0
12
+ uvicorn