falcon-180b-demo

Runtime error

futranbg commited on Nov 5, 2023

Commit

fec6802

1 Parent(s): 6ae1c70

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from huggingface_hub import Repository, InferenceClient
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf"
 BOT_NAME = "LLAMA"
 STOP_SEQUENCES = ["\nUser:", " User:", "###", "</s>"]
@@ -25,6 +26,11 @@ client = InferenceClient(
     headers={"Authorization": f"Bearer {HF_TOKEN}"},
 )
 def format_prompt(message, history, system_prompt):
   prompt = ""
   if system_prompt:
@@ -72,9 +78,23 @@ def generate(
                     yield output
             yield output
     except Exception as e:
-        raise gr.Error(f"Error while generating: {e}")
-    return output
 additional_inputs=[
     gr.Textbox("", label="Optional system prompt"),
@@ -116,7 +136,6 @@ additional_inputs=[
     )
 ]
 with gr.Blocks() as demo:
     gr.ChatInterface(

 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf"
+API_URL_2 = "https://api-inference.huggingface.co/models/codellama/CodeLlama-34b-Instruct-hf"
 BOT_NAME = "LLAMA"
 STOP_SEQUENCES = ["\nUser:", " User:", "###", "</s>"]
     headers={"Authorization": f"Bearer {HF_TOKEN}"},
 )
+client2 = InferenceClient(
+    API_URL_2,
+    headers={"Authorization": f"Bearer {HF_TOKEN}"},
+)
 def format_prompt(message, history, system_prompt):
   prompt = ""
   if system_prompt:
                     yield output
             yield output
     except Exception as e:
+        raise gr.Error(f"Client 1 error while generating: {e}")
+        try:
+            stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+            output = ""
+            for response in stream:
+                output += response.token.text
+                for stop_str in STOP_SEQUENCES:
+                    if output.endswith(stop_str):
+                        output = output[:-len(stop_str)]
+#                        output = output.rstrip()
+                        yield output
+                yield output
+        except Exception as e:
+            raise gr.Error(f"Client 2 error while generating: {e}")
+    return output
 additional_inputs=[
     gr.Textbox("", label="Optional system prompt"),
     )
 ]
 with gr.Blocks() as demo:
     gr.ChatInterface(