Spaces:
Paused
Paused
Roger Condori
commited on
add limt in HF app.py
Browse files
app.py
CHANGED
|
@@ -77,7 +77,9 @@ def convert():
|
|
| 77 |
data_docs += f"<hr><h3 style='color:red;'>{pg}</h2><p>{txt}</p><p>{sc}</p>"
|
| 78 |
return data_docs
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
| 81 |
|
| 82 |
with gr.Blocks(theme=theme, css=css) as demo:
|
| 83 |
with gr.Tab("Chat"):
|
|
@@ -93,7 +95,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
| 93 |
sou = gr.HTML("")
|
| 94 |
|
| 95 |
with gr.Tab("Chat Options"):
|
| 96 |
-
max_docs = gr.inputs.Slider(1,
|
| 97 |
row_table = gr.HTML("<hr><h4> </h2>")
|
| 98 |
clear_button = gr.Button("CLEAR CHAT HISTORY", )
|
| 99 |
link_output = gr.HTML("")
|
|
@@ -105,7 +107,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
| 105 |
gr.HTML("<h3>Only models from the GGML library are accepted.</h3>")
|
| 106 |
repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
|
| 107 |
file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.q2_K.bin")
|
| 108 |
-
max_tokens = gr.inputs.Slider(1,
|
| 109 |
temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
|
| 110 |
top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
|
| 111 |
top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
|
|
@@ -118,9 +120,12 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
| 118 |
msg.submit(predict,[msg, chatbot, max_docs],[msg, chatbot]).then(convert,[],[sou])
|
| 119 |
|
| 120 |
change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[model_verify])
|
| 121 |
-
falcon_button.click(dc.default_falcon_model, [], [model_verify])
|
| 122 |
|
|
|
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
| 126 |
demo.launch(enable_queue=True)
|
|
|
|
| 77 |
data_docs += f"<hr><h3 style='color:red;'>{pg}</h2><p>{txt}</p><p>{sc}</p>"
|
| 78 |
return data_docs
|
| 79 |
|
| 80 |
+
# Max values in generation
|
| 81 |
+
DOC_DB_LIMIT = 10
|
| 82 |
+
MAX_NEW_TOKENS = 2048
|
| 83 |
|
| 84 |
with gr.Blocks(theme=theme, css=css) as demo:
|
| 85 |
with gr.Tab("Chat"):
|
|
|
|
| 95 |
sou = gr.HTML("")
|
| 96 |
|
| 97 |
with gr.Tab("Chat Options"):
|
| 98 |
+
max_docs = gr.inputs.Slider(1, DOC_DB_LIMIT, default=3, label="Maximum querys to the DB.", step=1)
|
| 99 |
row_table = gr.HTML("<hr><h4> </h2>")
|
| 100 |
clear_button = gr.Button("CLEAR CHAT HISTORY", )
|
| 101 |
link_output = gr.HTML("")
|
|
|
|
| 107 |
gr.HTML("<h3>Only models from the GGML library are accepted.</h3>")
|
| 108 |
repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
|
| 109 |
file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.q2_K.bin")
|
| 110 |
+
max_tokens = gr.inputs.Slider(1, MAX_NEW_TOKENS, default=16, label="Max new tokens", step=1)
|
| 111 |
temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
|
| 112 |
top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
|
| 113 |
top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
|
|
|
|
| 120 |
msg.submit(predict,[msg, chatbot, max_docs],[msg, chatbot]).then(convert,[],[sou])
|
| 121 |
|
| 122 |
change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[model_verify])
|
|
|
|
| 123 |
|
| 124 |
+
falcon_button.click(dc.default_falcon_model, [], [model_verify])
|
| 125 |
|
| 126 |
+
# limit in HF, no need to set it
|
| 127 |
+
if "SET_LIMIT" == os.getenv("DEMO"):
|
| 128 |
+
DOC_DB_LIMIT = 4
|
| 129 |
+
MAX_NEW_TOKENS = 32
|
| 130 |
+
|
| 131 |
demo.launch(enable_queue=True)
|