Spaces:

Elijahbodden
/

llama.cpp

Runtime error

App Files Files Community

Elijahbodden commited on May 28, 2024

Commit

568fb1d

verified ·

1 Parent(s): f79086f

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -19

app.py CHANGED Viewed

@@ -85,28 +85,24 @@ def respond(
         response += token
         yield response
-demo = gr.ChatInterface(
-    # train your own
-    # Dumb
-    # thank llama.cpp and unsloth/explain what this is
-    # It will make up facts and opinions that i don't hold
-    # Like if you took my brain and distilled it to be as smart as a toddler
     respond,
     additional_inputs_accordion=gr.Accordion(label="Options", open=True),
     css=".bubble-gap {gap: 6px !important}",
     theme="shivi/calm_seafoam",
-    description="""Llama 3 8b finetuned on 2.5k of my discord messages. [Train your own clone!](https://gist.github.com/Elijah-Bodden/1964bd02fcd19efef65f6e0cd92881c4)
-    Q&A:
-    Q Why is the model so fucking slow
-    A The model might be slow if it hasn't run recently or a lot of people are using it (it's running on llama.cpp on a single a very slow cpu). You can duplicate the space to get your own (free) instance with no wait times.
-    Q Why is the model so dumb
-    A Llama 3 8b is impressive, but it's still tiny. This model is basically what you'd get if you shoved my brain into a toddler's head - it's just too small to be smart
-    Q Either it just made something up or I don't know you at all
-    A Probably the former. It's prone to hallucinating facts and opinions I don't hold. Take everything it says with a big grain of salt
-    """,
-    title="EliGPT v1.3",
     additional_inputs=[
         gr.Radio(presets.keys(), label="Personality preset", info="VERY SLIGHTLY influence the model's personality [WARNING, IF YOU CHANGE THIS WHILE THERE ARE MESSAGES IN THE CHAT THE MODEL WILL BECOME VERY SLOW]", value="Default"),
         # ("The model will become slow" is bc this uncaches the prompt and prompt processing is a big part of the generation time)
@@ -118,7 +114,7 @@ demo = gr.ChatInterface(
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Presence penalty", info='"Use lots of diverse words"'),
         gr.Slider(minimum=1, maximum=1024, value=1024, step=1, label="Max new tokens", info="How many words can the model generate at most?"),
     ],
-)
 if __name__ == "__main__":

         response += token
         yield response
+with gr.Blocks() as demo:
+  gr.Markdown("# EliGPT v1.3")
+  gr.Markdown("Llama 3 8b finetuned on 2.5k of my discord messages. [Train your own clone!](https://gist.github.com/Elijah-Bodden/1964bd02fcd19efef65f6e0cd92881c4)")
+  with gr.Accordion("Q&A:"):
+      gr.Markdown("""Q Why is the model so fucking slow
+        A The model might be slow if it hasn't run recently or a lot of people are using it (it's running on llama.cpp on a single a very slow cpu). You can duplicate the space to get your own (free) instance with no wait times.
+        Q Why is the model so dumb
+        A Llama 3 8b is impressive, but it's still tiny. This model is basically what you'd get if you shoved my brain into a toddler's head - it's just too small to be smart
+        Q Either it just made something up or I don't know you at all
+        A Probably the former. It's prone to hallucinating facts and opinions I don't hold. Take everything it says with a big grain of salt
+      """)
+  gr.ChatInterface(
     respond,
     additional_inputs_accordion=gr.Accordion(label="Options", open=True),
     css=".bubble-gap {gap: 6px !important}",
     theme="shivi/calm_seafoam",
     additional_inputs=[
         gr.Radio(presets.keys(), label="Personality preset", info="VERY SLIGHTLY influence the model's personality [WARNING, IF YOU CHANGE THIS WHILE THERE ARE MESSAGES IN THE CHAT THE MODEL WILL BECOME VERY SLOW]", value="Default"),
         # ("The model will become slow" is bc this uncaches the prompt and prompt processing is a big part of the generation time)
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Presence penalty", info='"Use lots of diverse words"'),
         gr.Slider(minimum=1, maximum=1024, value=1024, step=1, label="Max new tokens", info="How many words can the model generate at most?"),
     ],
+  )
 if __name__ == "__main__":