| import gradio as gr |
| import urllib.request |
| import requests |
| import bs4 |
| import lxml |
| import os |
| |
| from huggingface_hub import InferenceClient,HfApi |
| import random |
| import json |
| import datetime |
| |
| from agent import ( |
| FINDER, |
| COMPRESS_HISTORY_PROMPT, |
| COMPRESS_DATA_PROMPT, |
| COMPRESS_DATA_PROMPT_SMALL, |
| LOG_PROMPT, |
| LOG_RESPONSE, |
| PREFIX, |
| TASK_PROMPT, |
| ) |
| api=HfApi() |
|
|
|
|
|
|
| client = InferenceClient( |
| "mistralai/Mixtral-8x7B-Instruct-v0.1" |
| ) |
|
|
| def parse_action(string: str): |
| print("PARSING:") |
| print(string) |
| assert string.startswith("action:") |
| idx = string.find("action_input=") |
| print(idx) |
| if idx == -1: |
| print ("idx == -1") |
| print (string[8:]) |
| return string[8:], None |
|
|
| print ("last return:") |
| print (string[8 : idx - 1]) |
| print (string[idx + 13 :].strip("'").strip('"')) |
| return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"') |
|
|
|
|
|
|
| VERBOSE = True |
| MAX_HISTORY = 100 |
| MAX_DATA = 20000 |
|
|
| def format_prompt(message, history): |
| prompt = "<s>" |
| for user_prompt, bot_response in history: |
| prompt += f"[INST] {user_prompt} [/INST]" |
| prompt += f" {bot_response}</s> " |
| prompt += f"[INST] {message} [/INST]" |
| return prompt |
|
|
| def call_search(purpose, task, history, action_input): |
| return_list=[] |
| print (action_input) |
| |
| print ("trying") |
| try: |
| if action_input != "" and action_input != None: |
| action_input.strip('""') |
| |
| |
| model_list = api.list_models(filter=f"{action_input}") |
| this_obj = list(model_list) |
| print(f'THIS_OBJ :: {this_obj[0]}') |
| for i,eb in enumerate(this_obj): |
| |
| return_list.append({"id":this_obj[i].id, |
| "author":this_obj[i].author, |
| "created_at":this_obj[i].created_at, |
| "last_modified":this_obj[i].last_modified, |
| "private":this_obj[i].private, |
| "gated":this_obj[i].gated, |
| "disabled":this_obj[i].disabled, |
| "downloads":this_obj[i].downloads, |
| "likes":this_obj[i].likes, |
| "library_name":this_obj[i].library_name, |
| "tags":this_obj[i].tags, |
| "pipeline_tag":this_obj[i].pipeline_tag, |
| }) |
| |
| c=0 |
| rl = len(return_list) |
| print(rl) |
| for i in str(return_list): |
| if i == " " or i==",": |
| c +=1 |
| |
| print (c) |
| if rl > MAX_DATA: |
| print("compressing...") |
| return_list = compress_data(rl,purpose,task,return_list) |
| history = "observation: the search results are:\n {}\n".format(return_list) |
| return "MAIN", None, history, task |
| else: |
| history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n" |
| return "UPDATE-TASK", None, history, task |
| except Exception as e: |
| print (e) |
| history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n" |
| return "UPDATE-TASK", None, history, task |
|
|
| |
| |
| |
| return "MAIN", None, history, task |
|
|
|
|
| def run_gpt( |
| prompt_template, |
| stop_tokens, |
| max_tokens, |
| seed, |
| purpose, |
| **prompt_kwargs, |
| ): |
| timestamp=datetime.datetime.now() |
|
|
| print(seed) |
| generate_kwargs = dict( |
| temperature=0.9, |
| max_new_tokens=max_tokens, |
| top_p=0.95, |
| repetition_penalty=1.0, |
| do_sample=True, |
| seed=seed, |
| ) |
| |
| content = PREFIX.format( |
| timestamp=timestamp, |
| purpose=purpose, |
| ) + prompt_template.format(**prompt_kwargs) |
| if VERBOSE: |
| print(LOG_PROMPT.format(content)) |
| |
| |
| |
| |
|
|
| stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False) |
| resp = "" |
| for response in stream: |
| resp += response.token.text |
| |
|
|
| if VERBOSE: |
| print(LOG_RESPONSE.format(resp)) |
| return resp |
|
|
| def compress_data(c,purpose, task, history): |
| seed=random.randint(1,1000000000) |
| |
| print (c) |
| |
| |
| divr=int(c)/MAX_DATA |
| divi=int(divr)+1 if divr != int(divr) else int(divr) |
| chunk = int(int(c)/divr) |
| print(f'chunk:: {chunk}') |
| print(f'divr:: {divr}') |
| print (f'divi:: {divi}') |
| out = [] |
| |
| s=0 |
| e=chunk |
| print(f'e:: {e}') |
| new_history="" |
| task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n' |
| for z in range(divi): |
| print(f's:e :: {s}:{e}') |
| |
| hist = history[s:e] |
| |
| resp = run_gpt( |
| COMPRESS_DATA_PROMPT_SMALL, |
| stop_tokens=["observation:", "task:", "action:", "thought:"], |
| max_tokens=2048, |
| seed=seed, |
| purpose=purpose, |
| task=task, |
| knowledge=new_history, |
| history=hist, |
| ) |
| new_history = resp |
| print (resp) |
| out+=resp |
| e=e+chunk |
| s=s+chunk |
| ''' |
| resp = run_gpt( |
| COMPRESS_DATA_PROMPT, |
| stop_tokens=["observation:", "task:", "action:", "thought:"], |
| max_tokens=1024, |
| seed=seed, |
| purpose=purpose, |
| task=task, |
| knowledge=new_history, |
| history="All data has been recieved.", |
| )''' |
| print ("final" + resp) |
| history = "observation: {}\n".format(resp) |
| return history |
|
|
|
|
|
|
|
|
| def compress_history(purpose, task, history): |
| resp = run_gpt( |
| COMPRESS_HISTORY_PROMPT, |
| stop_tokens=["observation:", "task:", "action:", "thought:"], |
| max_tokens=512, |
| seed=random.randint(1,1000000000), |
| purpose=purpose, |
| task=task, |
| history=history, |
| ) |
| history = "observation: {}\n".format(resp) |
| return history |
|
|
|
|
| def call_main(purpose, task, history, action_input): |
| resp = run_gpt( |
| FINDER, |
| stop_tokens=["observation:", "task:"], |
| max_tokens=2048, |
| seed=random.randint(1,1000000000), |
| purpose=purpose, |
| task=task, |
| history=history, |
| ) |
| lines = resp.strip().strip("\n").split("\n") |
| for line in lines: |
| if line == "": |
| continue |
| if line.startswith("thought: "): |
| history += "{}\n".format(line) |
| if line.startswith("action: COMPLETE"): |
| print("COMPLETE called") |
| return "COMPLETE", None, history, task |
| if line.startswith("action: "): |
| action_name, action_input = parse_action(line) |
| print(f'ACTION::{action_name} -- INPUT :: {action_input}') |
| history += "{}\n".format(line) |
| return action_name, action_input,history,task |
| else: |
| |
| history += "{}\n".format(line) |
| |
| |
| if "VERBOSE": |
| print(history) |
| |
| |
| return "MAIN", None, history, task |
|
|
|
|
| def call_set_task(purpose, task, history, action_input): |
| task = run_gpt( |
| TASK_PROMPT, |
| stop_tokens=[], |
| max_tokens=1024, |
| seed=random.randint(1,1000000000), |
| purpose=purpose, |
| task=task, |
| history=history, |
| ).strip("\n") |
| history += "observation: task has been updated to: {}\n".format(task) |
| return "MAIN", None, history, task |
|
|
|
|
|
|
| |
| def search_all(url): |
| source="" |
| return source |
|
|
|
|
|
|
| def find_all(purpose,task,history, url): |
| return_list=[] |
| print (url) |
| |
| print (f"trying URL:: {url}") |
| try: |
| if url != "" and url != None: |
| |
| out = [] |
| source = requests.get(url) |
| |
| soup = bs4.BeautifulSoup(source.content,'lxml') |
| |
| print(soup.title) |
| |
| print(soup.title.name) |
| |
| print(soup.title.string) |
| |
| print(soup.title.parent.name) |
| |
| print([tag.name for tag in soup.find_all()]) |
| rawp=(f'RAW TEXT RETURNED: {soup.text}') |
| |
| q=("a","p","span","content","article") |
| for p in soup.find_all(q): |
| out.append([{p.name:p.string,"parent":p.parent.name,"previous":p.previous,"first-child":[b.name for b in p.children],"content":p}]) |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| rl=len(rawp) |
| print (rl) |
| |
| |
| rawp = compress_data(rl,purpose,task,rawp) |
| print (rawp) |
| print (f'out:: {out}') |
| history = "observation: the search results are:\n {}\n".format(rawp) |
| task = "complete?" |
| return "MAIN", None, history, task |
| else: |
| history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" |
| return "MAIN", None, history, task |
| except Exception as e: |
| print (e) |
| history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" |
| return "MAIN", None, history, task |
|
|
| |
| |
| |
| return "MAIN", None, history, task |
|
|
| |
|
|
| NAME_TO_FUNC = { |
| "MAIN": call_main, |
| "UPDATE-TASK": call_set_task, |
| "SEARCH_ENGINE": find_all, |
| "SCRAPE_WEBSITE": find_all, |
| } |
|
|
|
|
| def run_action(purpose, task, history, action_name, action_input): |
| if action_name == "COMPLETE": |
| print("Complete - Exiting") |
| |
| return "COMPLETE", None, history, task |
|
|
| |
| if len(history.split("\n")) > MAX_HISTORY: |
| if VERBOSE: |
| print("COMPRESSING HISTORY") |
| history = compress_history(purpose, task, history) |
| if action_name in NAME_TO_FUNC: |
| |
| assert action_name in NAME_TO_FUNC |
|
|
| print(f"RUN: {action_name} ACTION_INPUT: {action_input}") |
| return NAME_TO_FUNC[action_name](purpose, task, history, action_input) |
| else: |
| history += "observation: The TOOL I tried to use returned an error, I need to select a tool from: (UPDATE-TASK, SEARCH_ENGINE, SCRAPE_WEBSITE, COMPLETE)\n" |
|
|
| return "MAIN", None, history, task |
|
|
| def run(purpose,history,data=None,file=None,url=None,pdf_url=None,pdf_batch=None): |
| task=None |
| |
| if history: |
| history=format_prompt(purpose, history) |
| else: history="" |
| action_name = "SEARCH_ENGINE" if task is None else "MAIN" |
| action_input = None |
| task = "Use search engine tool to search for more information" |
| while True: |
| print("") |
| print("") |
| print("---") |
| print("purpose:", purpose) |
| print("task:", task) |
| print("---") |
| |
| print("---") |
|
|
| action_name, action_input, history, task = run_action( |
| purpose, |
| task, |
| history, |
| action_name, |
| action_input, |
| ) |
| yield None,[(purpose,history)],None |
| if action_name == "COMPLETE": |
| return None,[(purpose,history)],None |
|
|
| def clear_fn(): |
| return "",[(None,None)] |
|
|
|
|
| with gr.Blocks() as app: |
| gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3>""") |
| chatbot = gr.Chatbot() |
| with gr.Row(): |
| with gr.Column(scale=3): |
| prompt=gr.Textbox(label = "Instructions (optional)") |
| with gr.Column(scale=1): |
| button=gr.Button() |
| |
| |
| with gr.Row(): |
| stop_button=gr.Button("Stop") |
| clear_btn = gr.Button("Clear") |
| with gr.Row(): |
| with gr.Tab("Text"): |
| data=gr.Textbox(label="Input Data (paste text)", lines=6) |
| with gr.Tab("File"): |
| file=gr.Files(label="Input File (.pdf .txt)") |
| with gr.Tab("Raw HTML"): |
| url = gr.Textbox(label="URL") |
| with gr.Tab("PDF URL"): |
| pdf_url = gr.Textbox(label="PDF URL") |
| with gr.Tab("PDF Batch"): |
| pdf_batch = gr.Textbox(label="PDF Batch (comma separated)") |
| e_box=gr.Textbox() |
| |
| |
| clear_btn.click(clear_fn,None,[prompt,chatbot]) |
| go=button.click(run,[prompt,chatbot,data,file,url,pdf_url,pdf_batch],[prompt,chatbot,e_box]) |
| stop_button.click(None,None,None,cancels=[go]) |
| app.launch(show_api=False,share=False) |