Spaces:

ThomasSimonini
/

FindThePassword

Sleeping

App Files Files Community

ThomasSimonini commited on Oct 30, 2023

Commit

779a991

1 Parent(s): f02b2fd

Create the app

Browse files

Files changed (1) hide show

app.py +147 -0

app.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import json
+import gradio as gr
+import os
+import requests
+hf_token = os.getenv("HF_TOKEN")
+api_url_7b = os.getenv("API_URL_LLAMA_7")
+api_url_13b = os.getenv("API_URL_LLAMA_13")
+api_url_70b = os.getenv("API_URL_LLAMA_70")
+headers = {
+    'Content-Type': 'application/json',
+}
+def predict(message,
+            chatbot,
+            system_prompt = "",
+            temperature = 0.9,
+            max_new_tokens = 256,
+            top_p = 0.6,
+            repetition_penalty = 1.0,
+            model):
+    # Write the system prompt
+    if system_prompt != "":
+        input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
+    else:
+        input_prompt = f"<s>[INST] "
+    temperature = float(temperature)
+    # We check that temperature is not less than 1e-2
+    if temperature < 1e-2:
+        temperature = 1e-2
+    top_p = float(top_p)
+    for interaction in chatbot:
+        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s>[INST] "
+    input_prompt = input_prompt + str(message) + " [/INST] "
+    data = {
+        "inputs": input_prompt,
+        "parameters": {
+            "max_new_tokens": max_new_tokens,
+            "temperature": temperature,
+            "top_p": top_p,
+            "repetition_penalty": repetition_penalty,
+            "do_sample": True,
+        },
+    }
+    response = requests.post(api_url, headers=headers, data=json.dumps(data), auth=("hf, hf_token"), stream=True)
+    partial_message = ""
+    for line in response.iter_lines():
+        if line:  # filter out keep-alive new lines
+            # Decode from bytes to string
+            decoded_line = line.decode('utf-8')
+            # Remove 'data:' prefix
+            if decoded_line.startswith('data:'):
+                json_line = decoded_line[5:]  # Exclude the first 5 characters ('data:')
+            else:
+                gr.Warning(f"This line does not start with 'data:': {decoded_line}")
+                continue
+            # Load as JSON
+            try:
+                json_obj = json.loads(json_line)
+                if 'token' in json_obj:
+                    partial_message = partial_message + json_obj['token']['text']
+                    yield partial_message
+                elif 'error' in json_obj:
+                    yield json_obj['error'] + '. Please refresh and try again with an appropriate smaller input prompt.'
+                else:
+                    gr.Warning(f"The key 'token' does not exist in this JSON object: {json_obj}")
+            except json.JSONDecodeError:
+                gr.Warning(f"This line is not valid JSON: {json_line}")
+                continue
+            except KeyError as e:
+                gr.Warning(f"KeyError: {e} occurred for JSON object: {json_obj}")
+                continue
+additional_inputs=[
+    gr.Dropdown(["api_url_7b", "api_url_13b", "api_url_70b"], label="Model", info="Which model to use?")
+    gr.Textbox("", label="Optional system prompt")
+    gr.Slider(
+        label="Temperature",
+        value=0.9,
+        minimum=0.0,
+        maximum=1.0,
+        step=0.05,
+        interactive=True,
+        info="Higher values produce more diverse outputs",
+    ),
+    gr.Slider(
+        label="Max new tokens",
+        value=256,
+        minimum=0,
+        maximum=4096,
+        step=64,
+        interactive=True,
+        info="The maximum numbers of new tokens",
+    ),
+    gr.Slider(
+        label="Top-p (nucleus sampling)",
+        value=0.6,
+        minimum=0.0,
+        maximum=1,
+        step=0.05,
+        interactive=True,
+        info="Higher values sample more low-probability tokens",
+    ),
+    gr.Slider(
+        label="Repetition penalty",
+        value=1.2,
+        minimum=1.0,
+        maximum=2.0,
+        step=0.05,
+        interactive=True,
+        info="Penalize repeated tokens",
+    )
+]
+chatbot = gr.Chatbot(avatar_images=('user.png', 'bot2.png'),bubble_full_width = False)
+chat_interface_stream = gr.ChatInterface(predict,
+                 title=title,
+                 description=description,
+                 textbox=gr.Textbox(),
+                 chatbot=chatbot_stream,
+                 css=css,
+                 examples=examples,
+                 cache_examples=True,
+                 additional_inputs=additional_inputs,
+                 model = model)
+# Gradio Demo
+with gr.Blocks() as demo:
+    with gr.Tab("Llama 70B"):
+        chat_interface.render()