Omnibus commited on
Commit
135fc23
·
1 Parent(s): 2851e9c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +519 -0
app.py ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Search models, datasets, users...
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Docs
8
+ Solutions
9
+ Pricing
10
+
11
+
12
+
13
+ Spaces:
14
+
15
+ Omnibus
16
+ /
17
+ Find-it-Auto
18
+
19
+
20
+ like
21
+ 0
22
+
23
+ Logs
24
+ App
25
+ Files
26
+ Community
27
+ Settings
28
+ Find-it-Auto
29
+ /
30
+ app.py
31
+ Omnibus's picture
32
+ Omnibus
33
+ Update app.py
34
+ f2ec3b4
35
+ 1 day ago
36
+ raw
37
+ history
38
+ blame
39
+ edit
40
+ delete
41
+ 15.2 kB
42
+ import gradio as gr
43
+ import urllib.request
44
+ import requests
45
+ import bs4
46
+ import lxml
47
+ import os
48
+ #import subprocess
49
+ from huggingface_hub import InferenceClient,HfApi
50
+ import random
51
+ import json
52
+ import datetime
53
+ #from query import tasks
54
+ from prompts import (
55
+ FINDER,
56
+ COMPRESS_HISTORY_PROMPT,
57
+ COMPRESS_DATA_PROMPT,
58
+ COMPRESS_DATA_PROMPT_SMALL,
59
+ LOG_PROMPT,
60
+ LOG_RESPONSE,
61
+ PREFIX,
62
+ TASK_PROMPT,
63
+ )
64
+ api=HfApi()
65
+
66
+
67
+
68
+ client = InferenceClient(
69
+ "mistralai/Mixtral-8x7B-Instruct-v0.1"
70
+ )
71
+
72
+ def parse_action(string: str):
73
+ print("PARSING:")
74
+ print(string)
75
+ assert string.startswith("action:")
76
+ idx = string.find("action_input=")
77
+ print(idx)
78
+ if idx == -1:
79
+ print ("idx == -1")
80
+ print (string[8:])
81
+ return string[8:], None
82
+
83
+ print ("last return:")
84
+ print (string[8 : idx - 1])
85
+ print (string[idx + 13 :].strip("'").strip('"'))
86
+ return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"')
87
+
88
+
89
+
90
+ VERBOSE = True
91
+ MAX_HISTORY = 100
92
+ MAX_DATA = 1000
93
+
94
+ def format_prompt(message, history):
95
+ prompt = "<s>"
96
+ for user_prompt, bot_response in history:
97
+ prompt += f"[INST] {user_prompt} [/INST]"
98
+ prompt += f" {bot_response}</s> "
99
+ prompt += f"[INST] {message} [/INST]"
100
+ return prompt
101
+
102
+ def call_search(purpose, task, history, action_input):
103
+ return_list=[]
104
+ print (action_input)
105
+ #if action_input in query.tasks:
106
+ print ("trying")
107
+ try:
108
+ if action_input != "" and action_input != None:
109
+ action_input.strip('""')
110
+ #model_list = api.list_models(filter=f"{action_input}",sort="last_modified",limit=1000,direction=-1)
111
+ #model_list = api.list_models(filter=f"{action_input}",limit=1000)
112
+ model_list = api.list_models(filter=f"{action_input}")
113
+ this_obj = list(model_list)
114
+ print(f'THIS_OBJ :: {this_obj[0]}')
115
+ for i,eb in enumerate(this_obj):
116
+ #return_list.append(this_obj[i].id)
117
+ return_list.append({"id":this_obj[i].id,
118
+ "author":this_obj[i].author,
119
+ "created_at":this_obj[i].created_at,
120
+ "last_modified":this_obj[i].last_modified,
121
+ "private":this_obj[i].private,
122
+ "gated":this_obj[i].gated,
123
+ "disabled":this_obj[i].disabled,
124
+ "downloads":this_obj[i].downloads,
125
+ "likes":this_obj[i].likes,
126
+ "library_name":this_obj[i].library_name,
127
+ "tags":this_obj[i].tags,
128
+ "pipeline_tag":this_obj[i].pipeline_tag,
129
+ })
130
+ #print (return_list)
131
+ c=0
132
+ rl = len(return_list)
133
+ print(rl)
134
+ for i in str(return_list):
135
+ if i == " " or i==",":
136
+ c +=1
137
+
138
+ print (c)
139
+ if rl > MAX_DATA:
140
+ print("compressing...")
141
+ return_list = compress_data(rl,purpose,task,return_list)
142
+ history = "observation: the search results are:\n {}\n".format(return_list)
143
+ return "MAIN", None, history, task
144
+ else:
145
+ history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n"
146
+ return "UPDATE-TASK", None, history, task
147
+ except Exception as e:
148
+ print (e)
149
+ history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n"
150
+ return "UPDATE-TASK", None, history, task
151
+
152
+ #else:
153
+ # history = "observation: The search query I used did not return a valid response"
154
+
155
+ return "MAIN", None, history, task
156
+
157
+
158
+ def run_gpt(
159
+ prompt_template,
160
+ stop_tokens,
161
+ max_tokens,
162
+ seed,
163
+ purpose,
164
+ **prompt_kwargs,
165
+ ):
166
+ timestamp=datetime.datetime.now()
167
+
168
+ print(seed)
169
+ generate_kwargs = dict(
170
+ temperature=0.9,
171
+ max_new_tokens=max_tokens,
172
+ top_p=0.95,
173
+ repetition_penalty=1.0,
174
+ do_sample=True,
175
+ seed=seed,
176
+ )
177
+
178
+ content = PREFIX.format(
179
+ timestamp=timestamp,
180
+ purpose=purpose,
181
+ ) + prompt_template.format(**prompt_kwargs)
182
+ if VERBOSE:
183
+ print(LOG_PROMPT.format(content))
184
+
185
+
186
+ #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
187
+ #formatted_prompt = format_prompt(f'{content}', history)
188
+
189
+ stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
190
+ resp = ""
191
+ for response in stream:
192
+ resp += response.token.text
193
+ #yield resp
194
+
195
+ if VERBOSE:
196
+ print(LOG_RESPONSE.format(resp))
197
+ return resp
198
+
199
+ def compress_data(c,purpose, task, history):
200
+ seed=random.randint(1,1000000000)
201
+
202
+ print (c)
203
+ #tot=len(purpose)
204
+ #print(tot)
205
+ divr=int(c)/MAX_DATA
206
+ divi=int(divr)+1 if divr != int(divr) else int(divr)
207
+ chunk = int(int(c)/divr)
208
+ print(f'chunk:: {chunk}')
209
+ print(f'divr:: {divr}')
210
+ print (f'divi:: {divi}')
211
+ out = []
212
+ #out=""
213
+ s=0
214
+ e=chunk
215
+ print(f'e:: {e}')
216
+ new_history=""
217
+ task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
218
+ for z in range(divi):
219
+ print(f's:e :: {s}:{e}')
220
+
221
+ hist = history[s:e]
222
+
223
+ resp = run_gpt(
224
+ COMPRESS_DATA_PROMPT_SMALL,
225
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
226
+ max_tokens=2048,
227
+ seed=seed,
228
+ purpose=purpose,
229
+ task=task,
230
+ knowledge=new_history,
231
+ history=hist,
232
+ )
233
+ new_history = resp
234
+ print (resp)
235
+ out+=resp
236
+ e=e+chunk
237
+ s=s+chunk
238
+ '''
239
+ resp = run_gpt(
240
+ COMPRESS_DATA_PROMPT,
241
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
242
+ max_tokens=1024,
243
+ seed=seed,
244
+ purpose=purpose,
245
+ task=task,
246
+ knowledge=new_history,
247
+ history="All data has been recieved.",
248
+ )'''
249
+ print ("final" + resp)
250
+ history = "observation: {}\n".format(resp)
251
+ return history
252
+
253
+
254
+
255
+
256
+ def compress_history(purpose, task, history):
257
+ resp = run_gpt(
258
+ COMPRESS_HISTORY_PROMPT,
259
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
260
+ max_tokens=512,
261
+ seed=random.randint(1,1000000000),
262
+ purpose=purpose,
263
+ task=task,
264
+ history=history,
265
+ )
266
+ history = "observation: {}\n".format(resp)
267
+ return history
268
+
269
+
270
+ def call_main(purpose, task, history, action_input):
271
+ resp = run_gpt(
272
+ FINDER,
273
+ stop_tokens=["observation:", "task:", "action:"],
274
+ max_tokens=512,
275
+ seed=random.randint(1,1000000000),
276
+ purpose=purpose,
277
+ task=task,
278
+ history=history,
279
+ )
280
+ lines = resp.strip().strip("\n").split("\n")
281
+ for line in lines:
282
+ if line == "":
283
+ continue
284
+ if line.startswith("thought: "):
285
+ history += "{}\n".format(line)
286
+ if line.startswith("action: COMPLETE"):
287
+ print("COMPLETE called")
288
+ return "COMPLETE", None, history, task
289
+ if line.startswith("action:"):
290
+ action_name, action_input = parse_action(line)
291
+ print(f'ACTION::{action_name} -- INPUT :: {action_input}')
292
+ history += "{}\n".format(line)
293
+ return action_name, action_input, history, task
294
+ else:
295
+
296
+ history += "{}\n".format(line)
297
+ #assert False, "unknown action: {}".format(line)
298
+ #return "UPDATE-TASK", None, history, task
299
+ if "VERBOSE":
300
+ print(history)
301
+ return "MAIN", None, history, task
302
+
303
+
304
+ def call_set_task(purpose, task, history, action_input):
305
+ task = run_gpt(
306
+ TASK_PROMPT,
307
+ stop_tokens=[],
308
+ max_tokens=1024,
309
+ seed=random.randint(1,1000000000),
310
+ purpose=purpose,
311
+ task=task,
312
+ history=history,
313
+ ).strip("\n")
314
+ history += "observation: task has been updated to: {}\n".format(task)
315
+ return "MAIN", None, history, task
316
+
317
+
318
+
319
+ ###########################################################
320
+ def search_all(url):
321
+ source=""
322
+ return source
323
+
324
+
325
+
326
+ def find_all(purpose,task,history, url):
327
+ return_list=[]
328
+ print (url)
329
+ #if action_input in query.tasks:
330
+ print (f"trying URL:: {url}")
331
+ try:
332
+ if url != "" and url != None:
333
+ #rawp = []
334
+ out = []
335
+ source = requests.get(url)
336
+ #source = urllib.request.urlopen(url).read()
337
+ soup = bs4.BeautifulSoup(source.content,'lxml')
338
+ # title of the page
339
+ print(soup.title)
340
+ # get attributes:
341
+ print(soup.title.name)
342
+ # get values:
343
+ print(soup.title.string)
344
+ # beginning navigation:
345
+ print(soup.title.parent.name)
346
+ #rawp.append([tag.name for tag in soup.find_all()] )
347
+ print([tag.name for tag in soup.find_all()])
348
+ rawp=(f'RAW TEXT RETURNED: {soup.text}')
349
+ out.append(rawp)
350
+ q=("a","p","span","content","article")
351
+ for p in soup.find_all(q):
352
+ out.append([{p.name:p.string,"parent":p.parent.name,"previous":p.previous,"first-child":[b.name for b in p.children],"content":p}])
353
+ c=0
354
+ out = str(out)
355
+ rl = len(out)
356
+ print(f'rl:: {rl}')
357
+ #for ea in out:
358
+ for i in str(out):
359
+ if i == " " or i=="," or i=="\n":
360
+ c +=1
361
+ print (f'c:: {c}')
362
+ if rl > MAX_DATA:
363
+ print("compressing...")
364
+ rawp = compress_data(c,purpose,task,out)
365
+ print (rawp)
366
+ print (f'out:: {out}')
367
+ history += "observation: the search results are:\n {}\n".format(out)
368
+ task = "complete?"
369
+ return "MAIN", None, history, task
370
+ else:
371
+ history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n"
372
+ return "MAIN", None, history, task
373
+ except Exception as e:
374
+ print (e)
375
+ history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n"
376
+ return "MAIN", None, history, task
377
+
378
+ #else:
379
+ # history = "observation: The search query I used did not return a valid response"
380
+
381
+ return "MAIN", None, history, task
382
+
383
+
384
+ def find_it(url,q=None,num=None):
385
+ out = []
386
+ out_l = []
387
+ z=""
388
+ source = urllib.request.urlopen(url).read()
389
+ soup = bs4.BeautifulSoup(source,'lxml')
390
+
391
+ for p in soup.find_all(f'{q}'):
392
+ if num != "":
393
+ z=p.get(f'{num}')
394
+
395
+ try:
396
+ test = soup.select(f'{p.name}:first-child')
397
+
398
+ #print(p.findChildren())
399
+ except Exception as e:
400
+ print (e)
401
+ #out.append(p)
402
+ out.append([{q:p.string,"additional":z,"parent":p.parent.name,"previous":[b for b in p.previous],"first-child":[b.name for b in p.children],"content":p}])
403
+ if p.string !=None:
404
+ out_l.append(p.string)
405
+ else:
406
+ out_l.append(z)
407
+ #out.append(p.parent.name)
408
+ print(dir(p))
409
+ print(p.parent.name)
410
+ for url in soup.find_all('a'):
411
+ print(url.get('href'))
412
+
413
+ #print(soup.get_text())
414
+ return out,out_l
415
+
416
+ def find_it2(url):
417
+ response = requests.get(url,a1=None,q2=None,q3=None)
418
+ try:
419
+ response.raise_for_status()
420
+ soup = BeautifulSoup(response.content, 'lxml')
421
+ out = 'URL Links:\n'.join([p.text for p in soup.find_all('a')])
422
+ return out
423
+ except Exception as e:
424
+ print (e)
425
+ return e
426
+ #################################
427
+
428
+ NAME_TO_FUNC = {
429
+ "MAIN": call_main,
430
+ "UPDATE-TASK": call_set_task,
431
+ "SEARCH_ENGINE": find_all,
432
+ "SCRAPE_WEBSITE": find_all,
433
+ }
434
+
435
+
436
+ def run_action(purpose, task, history, action_name, action_input):
437
+ if action_name == "COMPLETE":
438
+ print("Complete - Exiting")
439
+ #exit(0)
440
+ return "COMPLETE", None, history, task
441
+
442
+ # compress the history when it is long
443
+ if len(history.split("\n")) > MAX_HISTORY:
444
+ if VERBOSE:
445
+ print("COMPRESSING HISTORY")
446
+ history = compress_history(purpose, task, history)
447
+ if action_name in NAME_TO_FUNC:
448
+
449
+ assert action_name in NAME_TO_FUNC
450
+
451
+ print(f"RUN: {action_name} ACTION_INPUT: {action_input}")
452
+ return NAME_TO_FUNC[action_name](purpose, task, history, action_input)
453
+ else:
454
+ history += "observation: The TOOL I tried to use returned an error, I need to select a tool from: (UPDATE-TASK, SEARCH_ENGINE, WEBSITE_SCRAPE, COMPLETE)\n"
455
+
456
+ return "MAIN", None, history, task
457
+
458
+ def run(purpose,history,data=None,file=None,url=None,pdf_url=None,pdf_batch=None):
459
+ task=None
460
+ history = ""
461
+ #if not history:
462
+ # history = []
463
+ action_name = "UPDATE-TASK" if task is None else "MAIN"
464
+ action_input = None
465
+ while True:
466
+ print("")
467
+ print("")
468
+ print("---")
469
+ print("purpose:", purpose)
470
+ print("task:", task)
471
+ print("---")
472
+ #print(history)
473
+ print("---")
474
+
475
+ action_name, action_input, history, task = run_action(
476
+ purpose,
477
+ task,
478
+ history,
479
+ action_name,
480
+ action_input,
481
+ )
482
+ yield history
483
+ if action_name == "COMPLETE":
484
+ return history
485
+
486
+
487
+
488
+
489
+ with gr.Blocks() as app:
490
+ gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3>""")
491
+ chatbot = gr.Chatbot()
492
+ with gr.Row():
493
+ with gr.Column(scale=3):
494
+ prompt=gr.Textbox(label = "Instructions (optional)")
495
+ with gr.Column(scale=1):
496
+ button=gr.Button()
497
+
498
+ #models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True)
499
+ with gr.Row():
500
+ stop_button=gr.Button("Stop")
501
+ clear_btn = gr.Button("Clear")
502
+ with gr.Row():
503
+ with gr.Tab("Text"):
504
+ data=gr.Textbox(label="Input Data (paste text)", lines=6)
505
+ with gr.Tab("File"):
506
+ file=gr.Files(label="Input File (.pdf .txt)")
507
+ with gr.Tab("Raw HTML"):
508
+ url = gr.Textbox(label="URL")
509
+ with gr.Tab("PDF URL"):
510
+ pdf_url = gr.Textbox(label="PDF URL")
511
+ with gr.Tab("PDF Batch"):
512
+ pdf_batch = gr.Textbox(label="PDF Batch (comma separated)")
513
+ e_box=gr.Textbox()
514
+ #text=gr.JSON()
515
+ #inp_query.change(search_models,inp_query,models_dd)
516
+ clear_btn.click(clear_fn,None,[prompt,chatbot])
517
+ go=button.click(run,[prompt,chatbot,data,file,url,pdf_url,pdf_batch],[prompt,chatbot,e_box])
518
+ stop_button.click(None,None,None,cancels=[go])
519
+ app.launch(server_port=7860,show_api=False)