Spaces:
Build error
Build error
| import gradio as gr | |
| import urllib.request | |
| import requests | |
| import bs4 | |
| import lxml | |
| import os | |
| #import subprocess | |
| from huggingface_hub import InferenceClient,HfApi | |
| import random | |
| import json | |
| import datetime | |
| import dl | |
| #from query import tasks | |
| from prompts import ( | |
| FINDER, | |
| MAIN_PROMPT, | |
| READ_FILE_CODE, | |
| COMPRESS_HISTORY_PROMPT, | |
| COMPRESS_DATA_PROMPT, | |
| COMPRESS_DATA_PROMPT_SMALL, | |
| LOG_PROMPT, | |
| LOG_RESPONSE, | |
| PREFIX, | |
| TASK_PROMPT, | |
| ) | |
| api=HfApi() | |
| client = InferenceClient( | |
| "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
| ) | |
| def parse_action(string: str): | |
| print("PARSING:") | |
| print(string) | |
| assert string.startswith("action:") | |
| idx = string.find("action_input=") | |
| print(idx) | |
| if idx == -1: | |
| print ("idx == -1") | |
| print (string[8:]) | |
| return string[8:], None | |
| print ("last return:") | |
| print (string[8 : idx - 1]) | |
| print (string[idx + 13 :].strip("'").strip('"')) | |
| return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"') | |
| VERBOSE = True | |
| MAX_HISTORY = 100 | |
| MAX_DATA = 20000 | |
| def format_prompt(message, history): | |
| prompt = "<s>" | |
| for user_prompt, bot_response in history: | |
| prompt += f"[INST] {user_prompt} [/INST]" | |
| prompt += f" {bot_response}</s> " | |
| prompt += f"[INST] {message} [/INST]" | |
| return prompt | |
| def run_gpt( | |
| prompt_template, | |
| stop_tokens, | |
| max_tokens, | |
| seed, | |
| purpose, | |
| files, | |
| **prompt_kwargs, | |
| ): | |
| timestamp=datetime.datetime.now() | |
| print(seed) | |
| generate_kwargs = dict( | |
| temperature=0.9, | |
| max_new_tokens=max_tokens, | |
| top_p=0.95, | |
| repetition_penalty=1.0, | |
| do_sample=True, | |
| seed=seed, | |
| ) | |
| content = PREFIX.format( | |
| timestamp=timestamp, | |
| purpose=purpose, | |
| files=files, | |
| ) + prompt_template.format(**prompt_kwargs) | |
| if VERBOSE: | |
| print(LOG_PROMPT.format(content)) | |
| #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history) | |
| #formatted_prompt = format_prompt(f'{content}', **prompt_kwargs['history']) | |
| stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False) | |
| resp = "" | |
| for response in stream: | |
| resp += response.token.text | |
| #yield resp | |
| if VERBOSE: | |
| print(LOG_RESPONSE.format(resp)) | |
| return resp | |
| def compress_data(c,purpose, task, history, result,repo,space,file_name): | |
| seed=random.randint(1,1000000000) | |
| print (c) | |
| #tot=len(purpose) | |
| #print(tot) | |
| divr=int(c)/MAX_DATA | |
| divi=int(divr)+1 if divr != int(divr) else int(divr) | |
| chunk = int(int(c)/divr) | |
| print(f'chunk:: {chunk}') | |
| print(f'divr:: {divr}') | |
| print (f'divi:: {divi}') | |
| out = [] | |
| #out="" | |
| s=0 | |
| e=chunk | |
| print(f'e:: {e}') | |
| new_history="" | |
| task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n' | |
| for z in range(divi): | |
| print(f's:e :: {s}:{e}') | |
| hist = history[s:e] | |
| resp = run_gpt( | |
| COMPRESS_DATA_PROMPT, | |
| stop_tokens=["observation:", "task:", "action:", "thought:"], | |
| max_tokens=2048, | |
| seed=seed, | |
| purpose=purpose, | |
| files=file_name, | |
| task=task, | |
| knowledge=new_history, | |
| history=hist, | |
| ).strip('\n') | |
| new_history = resp | |
| print (resp) | |
| out+=resp | |
| e=e+chunk | |
| s=s+chunk | |
| ''' | |
| resp = run_gpt( | |
| COMPRESS_DATA_PROMPT, | |
| stop_tokens=["observation:", "task:", "action:", "thought:"], | |
| max_tokens=2048, | |
| seed=seed, | |
| purpose=purpose, | |
| task=task, | |
| knowledge=new_history, | |
| history=result, | |
| ) | |
| ''' | |
| print ("final" + resp) | |
| history = "result: {}\n".format(resp) | |
| return history | |
| def read_code(purpose,task,history,action_input,result,repo,space,file_name): | |
| print("WORKING ON CODE") | |
| seed=random.randint(1,1000000000) | |
| out=dl.show_file_content(repo,space,action_input) | |
| out = str(out) | |
| rl = len(out) | |
| print(f'rl:: {rl}') | |
| c=0 | |
| for i in str(out): | |
| if i == " " or i=="," or i=="\n" or i=="/" or i=="." or i=="<": | |
| c +=1 | |
| print (c) | |
| #tot=len(purpose) | |
| #print(tot) | |
| divr=int(c)/MAX_DATA | |
| divi=int(divr)+1 if divr != int(divr) else int(divr) | |
| chunk = int(int(c)/divr) | |
| print(f'chunk:: {chunk}') | |
| print(f'divr:: {divr}') | |
| print (f'divi:: {divi}') | |
| s=0 | |
| e=chunk | |
| print(f'e:: {e}') | |
| new_history="" | |
| task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n' | |
| for z in range(divi): | |
| print(f's:e :: {s}:{e}') | |
| hist = out[s:e] | |
| resp = run_gpt( | |
| READ_FILE_CODE, | |
| stop_tokens=["observation:", "task:", "action:", "thought:"], | |
| max_tokens=4096, | |
| seed=seed, | |
| purpose=purpose, | |
| files=file_name, | |
| task=task, | |
| file_name=action_input, | |
| file_contents=hist, | |
| ).strip('\n') | |
| new_history = resp | |
| print (resp) | |
| out+=resp | |
| e=e+chunk-1000 | |
| s=s+chunk-1000 | |
| history += f'observation: the new code is: {resp}' | |
| result += f'\n{resp}\n' | |
| return "MAIN", None, history, task, result | |
| def compress_history(purpose, task, history,file_name): | |
| resp = run_gpt( | |
| COMPRESS_HISTORY_PROMPT, | |
| stop_tokens=["observation:", "task:", "action:", "thought:"], | |
| max_tokens=1024, | |
| seed=random.randint(1,1000000000), | |
| purpose=purpose, | |
| files=file_name, | |
| task=task, | |
| history=history, | |
| ) | |
| history = "observation: {}\n".format(resp) | |
| return history | |
| def call_main(purpose, task, history, action_input, result,repo,space,file_name): | |
| ''' | |
| out=dl.show_file_content(repo,space,action_input) | |
| resp = run_gpt( | |
| MAIN_PROMPT, | |
| stop_tokens=["observation:", "task:"], | |
| max_tokens=1024, | |
| seed=random.randint(1,1000000000), | |
| purpose=purpose, | |
| files=file_name, | |
| task=task, | |
| history=history, | |
| file_name=action_input, | |
| file_contents=out, | |
| ) | |
| ''' | |
| resp = run_gpt( | |
| MAIN_PROMPT, | |
| stop_tokens=["observation:", "task:"], | |
| max_tokens=1024, | |
| seed=random.randint(1,1000000000), | |
| purpose=purpose, | |
| files=file_name, | |
| task=task, | |
| history=history, | |
| ) | |
| lines = resp.strip().strip("\n").split("\n") | |
| #history="" | |
| for line in lines: | |
| if line == "": | |
| continue | |
| if line.startswith("thought: "): | |
| history += "{}\n".format(line) | |
| if line.startswith("action: "): | |
| action_name, action_input = parse_action(line) | |
| print(f'ACTION::{action_name} -- INPUT :: {action_input}') | |
| #history += "{}\n".format(line) | |
| return action_name, action_input, history, task, result | |
| else: | |
| pass | |
| #history += "{}\n".format(line) | |
| #assert False, "unknown action: {}".format(line) | |
| #return "UPDATE-TASK", None, history, task | |
| if "VERBOSE": | |
| print(history) | |
| return "MAIN", None, history, task, result | |
| def call_set_task(purpose, task, history, action_input, result,repo,space,file_name): | |
| task = run_gpt( | |
| TASK_PROMPT, | |
| stop_tokens=[], | |
| max_tokens=1024, | |
| seed=random.randint(1,1000000000), | |
| purpose=purpose, | |
| files=file_name, | |
| task=task, | |
| history=history, | |
| ).strip("\n") | |
| history += "observation: task has been updated to: {}\n".format(task) | |
| return "MAIN", None, history, task, result | |
| ########################################################### | |
| def search_all(url): | |
| source="" | |
| return source | |
| def find_all(purpose,task,history, url, result,repo,space,file_name): | |
| return_list=[] | |
| print (url) | |
| print (f"trying URL:: {url}") | |
| try: | |
| if url != "" and url != None: | |
| out = [] | |
| source = requests.get(url) | |
| if source.status_code ==200: | |
| soup = bs4.BeautifulSoup(source.content,'lxml') | |
| rawp=(f'RAW TEXT RETURNED: {soup.text}') | |
| cnt=0 | |
| cnt+=len(rawp) | |
| out.append(rawp) | |
| out.append("HTML fragments: ") | |
| q=("a","p","span","content","article") | |
| for p in soup.find_all("a"): | |
| out.append([{"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string}]) | |
| c=0 | |
| out = str(out) | |
| rl = len(out) | |
| print(f'rl:: {rl}') | |
| for i in str(out): | |
| if i == " " or i=="," or i=="\n" or i=="/" or i=="." or i=="<": | |
| c +=1 | |
| print (f'c:: {c}') | |
| if c > MAX_HISTORY: | |
| print("compressing...") | |
| rawp = compress_data(c,purpose,task,out,result,repo,space,file_name) | |
| result += rawp | |
| else: | |
| rawp = out | |
| #print (rawp) | |
| #print (f'out:: {out}') | |
| history += "observation: the search results are:\n {}\n".format(rawp) | |
| task = "compile report or complete?" | |
| return "MAIN", None, history, task, result | |
| else: | |
| history += f"observation: That URL string returned an error: {source.status_code}, I should try a different URL string\n" | |
| #result="Still Working..." | |
| return "MAIN", None, history, task, result | |
| else: | |
| history += "observation: An Error occured\nI need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" | |
| return "MAIN", None, history, task, result | |
| except Exception as e: | |
| print (e) | |
| history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" | |
| return "MAIN", None, history, task, result | |
| #else: | |
| # history = "observation: The search query I used did not return a valid response" | |
| return "MAIN", None, history, task, result | |
| ################################# | |
| NAME_TO_FUNC = { | |
| "MAIN": call_main, | |
| "UPDATE-TASK": call_set_task, | |
| "SEARCH_ENGINE": find_all, | |
| "SCRAPE_WEBSITE": find_all, | |
| "READ_CODE": read_code, | |
| } | |
| def run_action(purpose, task, history, action_name, action_input,result,repo,space,file_name): | |
| if "COMPLETE" in action_name: | |
| print("Complete - Exiting") | |
| #exit(0) | |
| return "COMPLETE", None, history, task, result | |
| # compress the history when it is long | |
| if len(history.split("\n")) > MAX_HISTORY: | |
| if VERBOSE: | |
| print("COMPRESSING HISTORY") | |
| history = compress_history(purpose, task, history,file_name) | |
| if action_name in NAME_TO_FUNC: | |
| assert action_name in NAME_TO_FUNC | |
| print(f"RUN: {action_name} ACTION_INPUT: {action_input}") | |
| return NAME_TO_FUNC[action_name](purpose, task, history, action_input, result,repo,space,file_name) | |
| else: | |
| history += "observation: The TOOL I tried to use returned an error, I need to select a tool from: (UPDATE-TASK, SEARCH_ENGINE, SCRAPE_WEBSITE, COMPLETE)\n" | |
| return "MAIN", None, history, task, result | |
| def run(purpose,history,repo,space,f_name,file_name): | |
| yield [(purpose,"Searching...")] | |
| task=None | |
| result="" | |
| #history = "" | |
| if not history: | |
| history = "" | |
| else: | |
| history=str(history) | |
| action_name = "MAIN" | |
| action_input = f_name | |
| while True: | |
| print("") | |
| print("") | |
| print("---") | |
| #print("purpose:", purpose) | |
| print("task:", task) | |
| print("---") | |
| #print(history) | |
| print("---") | |
| action_name, action_input, history, task, result = run_action( | |
| purpose, | |
| task, | |
| history, | |
| action_name, | |
| action_input, | |
| result, | |
| repo, | |
| space, | |
| file_name | |
| ) | |
| if not result: | |
| yield [(purpose,"More Searching...")] | |
| else: | |
| yield [(purpose,result)] | |
| if action_name == "COMPLETE": | |
| yield [(purpose,result)] | |
| break | |
| #return [(purpose,result)] | |
| examples =[ | |
| "What is the current weather in Florida?", | |
| "Find breaking news about Texas", | |
| "Find the best deals on flippers for scuba diving", | |
| "Teach me to fly a helicopter" | |
| ] | |
| def clear_fn(): | |
| return None,None | |
| rand_val=random.randint(1,99999999999) | |
| def check_rand(inp,val): | |
| if inp==True: | |
| return gr.Slider(label="Seed", minimum=1, maximum=99999999999, value=random.randint(1,99999999999)) | |
| else: | |
| return gr.Slider(label="Seed", minimum=1, maximum=99999999999, value=int(val)) | |
| with gr.Blocks() as app: | |
| gr.HTML("""<center><h1>Mixtral 8x7B RPG</h1><h3>HF Co-pilot (development)</h3>""") | |
| with gr.Group(): | |
| with gr.Row(): | |
| r_name = gr.Textbox(label="Repo") | |
| token = gr.Textbox(label="auth (optional)") | |
| s_btn = gr.Button("Show Spaces") | |
| with gr.Row(): | |
| s_name = gr.Dropdown(label="Spaces", choices=[]) | |
| f_name = gr.Dropdown(label="Files", choices=[]) | |
| l_btn = gr.Button("Load Files") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| chatbot=gr.Chatbot(show_label=False, show_share_button=True, show_copy_button=True, likeable=True, layout="panel", height="800px") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| opt=gr.Dropdown(label="Choices",choices=examples,allow_custom_value=True, value="Start a new game", interactive=True) | |
| #prompt=gr.Textbox(label = "Prompt", value="Start a new game") | |
| with gr.Column(scale=2): | |
| rand = gr.Checkbox(label="Random", value=True) | |
| seed=gr.Slider(label="Seed", minimum=1, maximum=99999999999, value=rand_val) | |
| #models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True) | |
| with gr.Row(): | |
| button=gr.Button() | |
| stop_button=gr.Button("Stop") | |
| clear_btn = gr.Button("Clear") | |
| with gr.Row(): | |
| tokens = gr.Slider(label="Max new tokens",value=2096,minimum=0,maximum=1048*10,step=64,interactive=False, visible=False,info="The maximum numbers of new tokens") | |
| with gr.Column(scale=1): | |
| files=gr.File(file_count='directory') | |
| space_info_json=gr.JSON() | |
| file_list=gr.Textbox() | |
| file_frame=gr.HTML() | |
| json_out=gr.JSON() | |
| s_btn.click(dl.show_s,[r_name,token],[s_name]) | |
| #l_btn.click(dl.show_f,[r_name,s_name,token], [f_name, files, space_info_json]) | |
| s_name.change(dl.show_f,[r_name,s_name,token], [f_name, files, file_list, space_info_json]) | |
| #s_name.change(dl.show_f_frame2,[r_name,s_name,f_name],[file_frame]) | |
| #s_name.change(dl.show_f_frame2,[r_name,s_name,f_name],[file_frame]) | |
| #space_radio.change(show_f,[r_name,space_radio,token],[f_name, files,file_radio,space_info_json]) | |
| #file_radio.change(show_f_cont,[r_name,space_radio,file_radio,token],[file_contents]) | |
| clear_btn.click(clear_fn,None,[opt,chatbot]) | |
| #go=button.click(check_rand,[rand,seed],seed).then(run,[opt,chatbot,tokens,char_stats,seed],[chatbot,char_stats,json_out,opt]) | |
| go=button.click(check_rand,[rand,seed],seed).then(run,[opt,chatbot,r_name,s_name,f_name,file_list],[chatbot]) | |
| stop_button.click(None,None,None,cancels=[go]) | |
| app.queue(default_concurrency_limit=20).launch(show_api=False) |