Spaces:

UniVerseAI
/

LLAVA-Chat-180B

Runtime error

App Files Files Community

Satyam-Singh commited on Jan 14, 2024

Commit

56c2aa5

verified ·

1 Parent(s): a8214c1

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -71

app.py CHANGED Viewed

@@ -7,13 +7,71 @@ client = InferenceClient("Satyam-Singh/LLaVa-Large-Language-Virtual-Assistant")
 TITLE = """<h1 align="center">LLaVa Large Language Virtual Assistant</h1>"""
-# Set up the model
-generation_config = {
-  "temperature": temperature_component,#0.9,
-  "top_p": top_p_component,#1,
-  "top_k": top_k_component,#1,
-  "max_output_tokens": max_output_tokens_component,#4096,
-}
 safety_settings = [
   {
@@ -36,6 +94,14 @@ safety_settings = [
 genai.configure(api_key=os.getenv("GOOGLE_PALM_KEY"))
 model = genai.GenerativeModel(model_name="gemini-pro",
                               generation_config=generation_config,
                               safety_settings=safety_settings)
@@ -99,71 +165,7 @@ convo = model.start_chat(history=[
   },
 ])
-temperature_component = gr.Slider(
-    minimum=0,
-    maximum=1.0,
-    value=0.4,
-    step=0.05,
-    label="Temperature",
-    info=(
-        "Temperature controls the degree of randomness in token selection. Lower "
-        "temperatures are good for prompts that expect a true or correct response, "
-        "while higher temperatures can lead to more diverse or unexpected results. "
-    ))
-max_output_tokens_component = gr.Slider(
-    minimum=1,
-    maximum=2048,
-    value=1024,
-    step=1,
-    label="Token limit",
-    info=(
-        "Token limit determines the maximum amount of text output from one prompt. A "
-        "token is approximately four characters. The default value is 2048."
-    ))
-stop_sequences_component = gr.Textbox(
-    label="Add stop sequence",
-    value="",
-    type="text",
-    placeholder="STOP, END",
-    info=(
-        "A stop sequence is a series of characters (including spaces) that stops "
-        "response generation if the model encounters it. The sequence is not included "
-        "as part of the response. You can add up to five stop sequences."
-    ))
-top_k_component = gr.Slider(
-    minimum=1,
-    maximum=40,
-    value=32,
-    step=1,
-    label="Top-K",
-    info=(
-        "Top-k changes how the model selects tokens for output. A top-k of 1 means the "
-        "selected token is the most probable among all tokens in the model’s "
-        "vocabulary (also called greedy decoding), while a top-k of 3 means that the "
-        "next token is selected from among the 3 most probable tokens (using "
-        "temperature)."
-    ))
-top_p_component = gr.Slider(
-    minimum=0,
-    maximum=1,
-    value=1,
-    step=0.01,
-    label="Top-P",
-    info=(
-        "Top-p changes how the model selects tokens for output. Tokens are selected "
-        "from most probable to least until the sum of their probabilities equals the "
-        "top-p value. For example, if tokens A, B, and C have a probability of .3, .2, "
-        "and .1 and the top-p value is .5, then the model will select either A or B as "
-        "the next token (using temperature). "
-    ))
-additional_inputs = [
-    temperature_component,
-    max_output_tokens_component,
-    stop_sequences_component,
-    top_k_component,
-    top_p_component,
-]
 def gemini_chat(message, history):
     response = convo.send_message(message)

 TITLE = """<h1 align="center">LLaVa Large Language Virtual Assistant</h1>"""
+temperature_component = gr.Slider(
+    minimum=0,
+    maximum=1.0,
+    value=0.4,
+    step=0.05,
+    label="Temperature",
+    info=(
+        "Temperature controls the degree of randomness in token selection. Lower "
+        "temperatures are good for prompts that expect a true or correct response, "
+        "while higher temperatures can lead to more diverse or unexpected results. "
+    ))
+max_output_tokens_component = gr.Slider(
+    minimum=1,
+    maximum=2048,
+    value=1024,
+    step=1,
+    label="Token limit",
+    info=(
+        "Token limit determines the maximum amount of text output from one prompt. A "
+        "token is approximately four characters. The default value is 2048."
+    ))
+stop_sequences_component = gr.Textbox(
+    label="Add stop sequence",
+    value="",
+    type="text",
+    placeholder="STOP, END",
+    info=(
+        "A stop sequence is a series of characters (including spaces) that stops "
+        "response generation if the model encounters it. The sequence is not included "
+        "as part of the response. You can add up to five stop sequences."
+    ))
+top_k_component = gr.Slider(
+    minimum=1,
+    maximum=40,
+    value=32,
+    step=1,
+    label="Top-K",
+    info=(
+        "Top-k changes how the model selects tokens for output. A top-k of 1 means the "
+        "selected token is the most probable among all tokens in the model’s "
+        "vocabulary (also called greedy decoding), while a top-k of 3 means that the "
+        "next token is selected from among the 3 most probable tokens (using "
+        "temperature)."
+    ))
+top_p_component = gr.Slider(
+    minimum=0,
+    maximum=1,
+    value=1,
+    step=0.01,
+    label="Top-P",
+    info=(
+        "Top-p changes how the model selects tokens for output. Tokens are selected "
+        "from most probable to least until the sum of their probabilities equals the "
+        "top-p value. For example, if tokens A, B, and C have a probability of .3, .2, "
+        "and .1 and the top-p value is .5, then the model will select either A or B as "
+        "the next token (using temperature). "
+    ))
+additional_inputs = [
+    temperature_component,
+    max_output_tokens_component,
+    stop_sequences_component,
+    top_k_component,
+    top_p_component,
+]
 safety_settings = [
   {
 genai.configure(api_key=os.getenv("GOOGLE_PALM_KEY"))
+# Set up the model
+generation_config = {
+  "temperature": temperature_component,#0.9,
+  "top_p": top_p_component,#1,
+  "top_k": top_k_component,#1,
+  "max_output_tokens": max_output_tokens_component,#4096,
+}
 model = genai.GenerativeModel(model_name="gemini-pro",
                               generation_config=generation_config,
                               safety_settings=safety_settings)
   },
 ])
 def gemini_chat(message, history):
     response = convo.send_message(message)