Spaces:

Omnibus
/

Brain

Runtime error

App Files Files Community

Omnibus commited on Jan 4, 2024

Commit

135fc23

1 Parent(s): 2851e9c

Create app.py

Browse files

Files changed (1) hide show

app.py +519 -0

app.py ADDED Viewed

	@@ -0,0 +1,519 @@

+Hugging Face's logo
+Hugging Face
+Search models, datasets, users...
+Models
+Datasets
+Spaces
+Docs
+Solutions
+Pricing
+Spaces:
+Omnibus
+/
+Find-it-Auto
+like
+0
+Logs
+App
+Files
+Community
+Settings
+Find-it-Auto
+/
+app.py
+Omnibus's picture
+Omnibus
+Update app.py
+f2ec3b4
+1 day ago
+raw
+history
+blame
+edit
+delete
+15.2 kB
+import gradio as gr
+import urllib.request
+import requests
+import bs4
+import lxml
+import os
+#import subprocess
+from huggingface_hub import InferenceClient,HfApi
+import random
+import json
+import datetime
+#from query import tasks
+from prompts import (
+    FINDER,
+    COMPRESS_HISTORY_PROMPT,
+    COMPRESS_DATA_PROMPT,
+    COMPRESS_DATA_PROMPT_SMALL,
+    LOG_PROMPT,
+    LOG_RESPONSE,
+    PREFIX,
+    TASK_PROMPT,
+)
+api=HfApi()
+client = InferenceClient(
+    "mistralai/Mixtral-8x7B-Instruct-v0.1"
+)
+def parse_action(string: str):
+    print("PARSING:")
+    print(string)
+    assert string.startswith("action:")
+    idx = string.find("action_input=")
+    print(idx)
+    if idx == -1:
+        print ("idx == -1")
+        print (string[8:])
+        return string[8:], None
+    print ("last return:")
+    print (string[8 : idx - 1])
+    print (string[idx + 13 :].strip("'").strip('"'))
+    return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"')
+VERBOSE = True
+MAX_HISTORY = 100
+MAX_DATA = 1000
+def format_prompt(message, history):
+  prompt = "<s>"
+  for user_prompt, bot_response in history:
+    prompt += f"[INST] {user_prompt} [/INST]"
+    prompt += f" {bot_response}</s> "
+  prompt += f"[INST] {message} [/INST]"
+  return prompt
+def call_search(purpose, task, history, action_input):
+    return_list=[]
+    print (action_input)
+    #if action_input in query.tasks:
+    print ("trying")
+    try:
+        if action_input != "" and action_input != None:
+            action_input.strip('""')
+            #model_list = api.list_models(filter=f"{action_input}",sort="last_modified",limit=1000,direction=-1)
+            #model_list = api.list_models(filter=f"{action_input}",limit=1000)
+            model_list = api.list_models(filter=f"{action_input}")
+            this_obj = list(model_list)
+            print(f'THIS_OBJ :: {this_obj[0]}')
+            for i,eb in enumerate(this_obj):
+                #return_list.append(this_obj[i].id)
+                return_list.append({"id":this_obj[i].id,
+                                    "author":this_obj[i].author,
+                                    "created_at":this_obj[i].created_at,
+                                    "last_modified":this_obj[i].last_modified,
+                                    "private":this_obj[i].private,
+                                    "gated":this_obj[i].gated,
+                                    "disabled":this_obj[i].disabled,
+                                    "downloads":this_obj[i].downloads,
+                                    "likes":this_obj[i].likes,
+                                    "library_name":this_obj[i].library_name,
+                                    "tags":this_obj[i].tags,
+                                    "pipeline_tag":this_obj[i].pipeline_tag,
+                                   })
+            #print (return_list)
+            c=0
+            rl = len(return_list)
+            print(rl)
+            for i in str(return_list):
+                if i == " " or i==",":
+                    c +=1
+            print (c)
+            if rl > MAX_DATA:
+                print("compressing...")
+                return_list = compress_data(rl,purpose,task,return_list)
+            history = "observation: the search results are:\n {}\n".format(return_list)
+            return "MAIN", None, history, task
+        else:
+            history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n"
+            return "UPDATE-TASK", None, history, task
+    except Exception as e:
+        print (e)
+        history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n"
+        return "UPDATE-TASK", None, history, task
+        #else:
+    #    history = "observation: The search query I used did not return a valid response"
+    return "MAIN", None, history, task
+def run_gpt(
+    prompt_template,
+    stop_tokens,
+    max_tokens,
+    seed,
+    purpose,
+    **prompt_kwargs,
+):
+    timestamp=datetime.datetime.now()
+    print(seed)
+    generate_kwargs = dict(
+        temperature=0.9,
+        max_new_tokens=max_tokens,
+        top_p=0.95,
+        repetition_penalty=1.0,
+        do_sample=True,
+        seed=seed,
+    )
+    content = PREFIX.format(
+        timestamp=timestamp,
+        purpose=purpose,
+    ) + prompt_template.format(**prompt_kwargs)
+    if VERBOSE:
+        print(LOG_PROMPT.format(content))
+    #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
+    #formatted_prompt = format_prompt(f'{content}', history)
+    stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    resp = ""
+    for response in stream:
+        resp += response.token.text
+        #yield resp
+    if VERBOSE:
+        print(LOG_RESPONSE.format(resp))
+    return resp
+def compress_data(c,purpose, task, history):
+    seed=random.randint(1,1000000000)
+    print (c)
+    #tot=len(purpose)
+    #print(tot)
+    divr=int(c)/MAX_DATA
+    divi=int(divr)+1 if divr != int(divr) else int(divr)
+    chunk = int(int(c)/divr)
+    print(f'chunk:: {chunk}')
+    print(f'divr:: {divr}')
+    print (f'divi:: {divi}')
+    out = []
+    #out=""
+    s=0
+    e=chunk
+    print(f'e:: {e}')
+    new_history=""
+    task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
+    for z in range(divi):
+        print(f's:e :: {s}:{e}')
+        hist = history[s:e]
+        resp = run_gpt(
+            COMPRESS_DATA_PROMPT_SMALL,
+            stop_tokens=["observation:", "task:", "action:", "thought:"],
+            max_tokens=2048,
+            seed=seed,
+            purpose=purpose,
+            task=task,
+            knowledge=new_history,
+            history=hist,
+        )
+        new_history = resp
+        print (resp)
+        out+=resp
+        e=e+chunk
+        s=s+chunk
+    '''
+    resp = run_gpt(
+        COMPRESS_DATA_PROMPT,
+        stop_tokens=["observation:", "task:", "action:", "thought:"],
+        max_tokens=1024,
+        seed=seed,
+        purpose=purpose,
+        task=task,
+        knowledge=new_history,
+        history="All data has been recieved.",
+    )'''
+    print ("final" + resp)
+    history = "observation: {}\n".format(resp)
+    return history
+def compress_history(purpose, task, history):
+    resp = run_gpt(
+        COMPRESS_HISTORY_PROMPT,
+        stop_tokens=["observation:", "task:", "action:", "thought:"],
+        max_tokens=512,
+        seed=random.randint(1,1000000000),
+        purpose=purpose,
+        task=task,
+        history=history,
+    )
+    history = "observation: {}\n".format(resp)
+    return history
+def call_main(purpose, task, history, action_input):
+    resp = run_gpt(
+        FINDER,
+        stop_tokens=["observation:", "task:", "action:"],
+        max_tokens=512,
+        seed=random.randint(1,1000000000),
+        purpose=purpose,
+        task=task,
+        history=history,
+    )
+    lines = resp.strip().strip("\n").split("\n")
+    for line in lines:
+        if line == "":
+            continue
+        if line.startswith("thought: "):
+            history += "{}\n".format(line)
+        if line.startswith("action: COMPLETE"):
+            print("COMPLETE called")
+            return "COMPLETE", None, history, task
+        if line.startswith("action:"):
+            action_name, action_input = parse_action(line)
+            print(f'ACTION::{action_name} -- INPUT :: {action_input}')
+            history += "{}\n".format(line)
+            return action_name, action_input, history, task
+        else:
+            history += "{}\n".format(line)
+            #assert False, "unknown action: {}".format(line)
+            #return "UPDATE-TASK", None, history, task
+    if "VERBOSE":
+        print(history)
+    return "MAIN", None, history, task
+def call_set_task(purpose, task, history, action_input):
+    task = run_gpt(
+        TASK_PROMPT,
+        stop_tokens=[],
+        max_tokens=1024,
+        seed=random.randint(1,1000000000),
+        purpose=purpose,
+        task=task,
+        history=history,
+    ).strip("\n")
+    history += "observation: task has been updated to: {}\n".format(task)
+    return "MAIN", None, history, task
+###########################################################
+def search_all(url):
+    source=""
+    return source
+def find_all(purpose,task,history, url):
+    return_list=[]
+    print (url)
+    #if action_input in query.tasks:
+    print (f"trying URL:: {url}")
+    try:
+        if url != "" and url != None:
+            #rawp = []
+            out = []
+            source = requests.get(url)
+            #source = urllib.request.urlopen(url).read()
+            soup = bs4.BeautifulSoup(source.content,'lxml')
+            # title of the page
+            print(soup.title)
+            # get attributes:
+            print(soup.title.name)
+            # get values:
+            print(soup.title.string)
+            # beginning navigation:
+            print(soup.title.parent.name)
+            #rawp.append([tag.name for tag in soup.find_all()] )
+            print([tag.name for tag in soup.find_all()])
+            rawp=(f'RAW TEXT RETURNED: {soup.text}')
+            out.append(rawp)
+            q=("a","p","span","content","article")
+            for p in soup.find_all(q):
+                out.append([{p.name:p.string,"parent":p.parent.name,"previous":p.previous,"first-child":[b.name for b in p.children],"content":p}])
+            c=0
+            out = str(out)
+            rl = len(out)
+            print(f'rl:: {rl}')
+            #for ea in out:
+            for i in str(out):
+                if i == " " or i=="," or i=="\n":
+                    c +=1
+            print (f'c:: {c}')
+            if rl > MAX_DATA:
+                print("compressing...")
+                rawp = compress_data(c,purpose,task,out)
+            print (rawp)
+            print (f'out:: {out}')
+            history += "observation: the search results are:\n {}\n".format(out)
+            task = "complete?"
+            return "MAIN", None, history, task
+        else:
+            history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n"
+            return "MAIN", None, history, task
+    except Exception as e:
+        print (e)
+        history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n"
+        return "MAIN", None, history, task
+        #else:
+    #    history = "observation: The search query I used did not return a valid response"
+    return "MAIN", None, history, task
+def find_it(url,q=None,num=None):
+    out = []
+    out_l = []
+    z=""
+    source = urllib.request.urlopen(url).read()
+    soup = bs4.BeautifulSoup(source,'lxml')
+    for p in soup.find_all(f'{q}'):
+        if num != "":
+            z=p.get(f'{num}')
+        try:
+            test = soup.select(f'{p.name}:first-child')
+            #print(p.findChildren())
+        except Exception as e:
+            print (e)
+        #out.append(p)
+        out.append([{q:p.string,"additional":z,"parent":p.parent.name,"previous":[b for b in p.previous],"first-child":[b.name for b in p.children],"content":p}])
+        if p.string !=None:
+            out_l.append(p.string)
+        else:
+            out_l.append(z)
+        #out.append(p.parent.name)
+        print(dir(p))
+        print(p.parent.name)
+    for url in soup.find_all('a'):
+        print(url.get('href'))
+    #print(soup.get_text())
+    return out,out_l
+def find_it2(url):
+    response = requests.get(url,a1=None,q2=None,q3=None)
+    try:
+        response.raise_for_status()
+        soup = BeautifulSoup(response.content, 'lxml')
+        out = 'URL Links:\n'.join([p.text for p in soup.find_all('a')])
+        return out
+    except Exception as e:
+        print (e)
+        return e
+#################################
+NAME_TO_FUNC = {
+    "MAIN": call_main,
+    "UPDATE-TASK": call_set_task,
+    "SEARCH_ENGINE": find_all,
+    "SCRAPE_WEBSITE": find_all,
+}
+def run_action(purpose, task, history, action_name, action_input):
+    if action_name == "COMPLETE":
+        print("Complete - Exiting")
+        #exit(0)
+        return "COMPLETE", None, history, task
+    # compress the history when it is long
+    if len(history.split("\n")) > MAX_HISTORY:
+        if VERBOSE:
+            print("COMPRESSING HISTORY")
+        history = compress_history(purpose, task, history)
+    if action_name in NAME_TO_FUNC:
+        assert action_name in NAME_TO_FUNC
+        print(f"RUN: {action_name}  ACTION_INPUT: {action_input}")
+        return NAME_TO_FUNC[action_name](purpose, task, history, action_input)
+    else:
+        history += "observation: The TOOL I tried to use returned an error, I need to select a tool from: (UPDATE-TASK, SEARCH_ENGINE, WEBSITE_SCRAPE, COMPLETE)\n"
+        return "MAIN", None, history, task
+def run(purpose,history,data=None,file=None,url=None,pdf_url=None,pdf_batch=None):
+    task=None
+    history = ""
+    #if not history:
+    #    history = []
+    action_name = "UPDATE-TASK" if task is None else "MAIN"
+    action_input = None
+    while True:
+        print("")
+        print("")
+        print("---")
+        print("purpose:", purpose)
+        print("task:", task)
+        print("---")
+        #print(history)
+        print("---")
+        action_name, action_input, history, task = run_action(
+            purpose,
+            task,
+            history,
+            action_name,
+            action_input,
+        )
+        yield history
+        if action_name == "COMPLETE":
+            return history
+with gr.Blocks() as app:
+    gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3>""")
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        with gr.Column(scale=3):
+            prompt=gr.Textbox(label = "Instructions (optional)")
+        with gr.Column(scale=1):
+            button=gr.Button()
+        #models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True)
+    with gr.Row():
+        stop_button=gr.Button("Stop")
+        clear_btn = gr.Button("Clear")
+    with gr.Row():
+        with gr.Tab("Text"):
+            data=gr.Textbox(label="Input Data (paste text)", lines=6)
+        with gr.Tab("File"):
+            file=gr.Files(label="Input File (.pdf .txt)")
+        with gr.Tab("Raw HTML"):
+            url = gr.Textbox(label="URL")
+        with gr.Tab("PDF URL"):
+            pdf_url = gr.Textbox(label="PDF URL")
+        with gr.Tab("PDF Batch"):
+            pdf_batch = gr.Textbox(label="PDF Batch (comma separated)")
+    e_box=gr.Textbox()
+    #text=gr.JSON()
+    #inp_query.change(search_models,inp_query,models_dd)
+    clear_btn.click(clear_fn,None,[prompt,chatbot])
+    go=button.click(run,[prompt,chatbot,data,file,url,pdf_url,pdf_batch],[prompt,chatbot,e_box])
+    stop_button.click(None,None,None,cancels=[go])
+app.launch(server_port=7860,show_api=False)