Spaces:

gustavoaq
/

llama_7b_ft_2

Paused

App Files Files Community

gustavoaq commited on Apr 11, 2023

Commit

7ee6941

1 Parent(s): 5c471d1

Update demo/app.py

Browse files

Files changed (1) hide show

demo/app.py +67 -80

demo/app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import logging
 import sys
 import gradio as gr
 import torch
 from app_modules.utils import *
 from app_modules.presets import *
 from app_modules.overwrites import *
@@ -15,53 +16,48 @@ logging.basicConfig(
 base_model = "decapoda-research/llama-7b-hf"
 adapter_model = "/home/user/app/checkpoint-100"
-tokenizer, model, device = load_tokenizer_and_model(base_model, adapter_model)
-def predict(
-    text,
-    chatbot,
-    history,
-    top_p,
-    temperature,
-    max_length_tokens,
-    max_context_length_tokens,
-):
-    if text == "":
-        yield chatbot, history, "Empty context."
         return
-    inputs = generate_prompt_with_history(
-        text, history, tokenizer, max_length=max_context_length_tokens
-    )
     if inputs is None:
-        yield chatbot, history, "Input too long."
-        return
     else:
-        prompt, inputs = inputs
         begin_length = len(prompt)
-    input_ids = inputs["input_ids"][:, -max_context_length_tokens:].to(device)
     torch.cuda.empty_cache()
     with torch.no_grad():
-        for x in sample_decode(
-            input_ids,
-            model,
-            tokenizer,
-            stop_words=["[|Human|]", "[|AI|]"],
-            max_length=max_length_tokens,
-            temperature=temperature,
-            top_p=top_p,
-        ):
-            if is_stop_word_or_prefix(x, ["[|Human|]", "[|AI|]"]) is False:
                 if "[|Human|]" in x:
-                    x = x[: x.index("[|Human|]")].strip()
                 if "[|AI|]" in x:
-                    x = x[: x.index("[|AI|]")].strip()
-                x = x.strip(" ")
-                a, b = [[y[0], convert_to_markdown(y[1])] for y in history] + [
-                    [text, convert_to_markdown(x)]
-                ], history + [[text, x]]
                 yield a, b, "Generating..."
             if shared_state.interrupted:
                 shared_state.recover()
@@ -70,40 +66,33 @@ def predict(
                     return
                 except:
                     pass
     torch.cuda.empty_cache()
-    print(prompt)
-    print(x)
-    print("=" * 80)
     try:
-        yield a, b, "Generate: Success"
     except:
         pass
 def retry(
-    text,
-    chatbot,
-    history,
-    top_p,
-    temperature,
-    max_length_tokens,
-    max_context_length_tokens,
-):
-    logging.info("Retry...")
-    if len(history) == 0:
-        yield chatbot, history, "Empty context."
-        return
-    chatbot.pop()
-    inputs = history.pop()[0]
-    for x in predict(
-        inputs,
         chatbot,
         history,
         top_p,
         temperature,
         max_length_tokens,
         max_context_length_tokens,
-    ):
         yield x
@@ -132,13 +121,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                     submitBtn = gr.Button("Send")
                 with gr.Column(min_width=70, scale=1):
                     cancelBtn = gr.Button("Stop")
             with gr.Row(scale=1):
                 emptyBtn = gr.Button(
                     "🧹 New Conversation",
                 )
                 retryBtn = gr.Button("🔄 Regenerate")
-                delLastBtn = gr.Button("🗑️ Remove Last Turn")
         with gr.Column():
             with gr.Column(min_width=50, scale=1):
                 with gr.Tab(label="Parameter Setting"):
@@ -162,7 +150,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                     max_length_tokens = gr.Slider(
                         minimum=0,
                         maximum=512,
-                        value=512,
                         step=8,
                         interactive=True,
                         label="Max Generation Tokens",
@@ -206,20 +194,18 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
         show_progress=True,
     )
-    reset_args = dict(fn=reset_textbox, inputs=[], outputs=[user_input, status_display])
     # Chatbot
-    cancelBtn.click(cancel_outputing, [], [status_display])
     transfer_input_args = dict(
-        fn=transfer_input,
-        inputs=[user_input],
-        outputs=[user_question, user_input, submitBtn, cancelBtn],
-        show_progress=True,
     )
-    user_input.submit(**transfer_input_args).then(**predict_args)
-    submitBtn.click(**transfer_input_args).then(**predict_args)
     emptyBtn.click(
         reset_state,
@@ -228,7 +214,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     )
     emptyBtn.click(**reset_args)
-    retryBtn.click(**retry_args)
     delLastBtn.click(
         delete_last_conversation,
@@ -236,11 +222,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
         [chatbot, history, status_display],
         show_progress=True,
     )
 demo.title = "Baize"
-if __name__ == "__main__":
-    reload_javascript()
-    demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
-        share=False, favicon_path="/home/user/app/demo/assets/favicon.ico", inbrowser=True
-    )

 import sys
 import gradio as gr
 import torch
+import gc
 from app_modules.utils import *
 from app_modules.presets import *
 from app_modules.overwrites import *
 base_model = "decapoda-research/llama-7b-hf"
 adapter_model = "/home/user/app/checkpoint-100"
+tokenizer,model,device = load_tokenizer_and_model(base_model,adapter_model)
+total_count = 0
+def predict(text,
+            chatbot,
+            history,
+            top_p,
+            temperature,
+            max_length_tokens,
+            max_context_length_tokens,):
+    if text=="":
+        yield chatbot,history,"Empty context."
+        return
+    try:
+        model
+    except:
+        yield [[text,"No Model Found"]],[],"No Model Found"
         return
+    inputs = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)
     if inputs is None:
+        yield chatbot,history,"Input too long."
+        return
     else:
+        prompt,inputs=inputs
         begin_length = len(prompt)
+    input_ids = inputs["input_ids"][:,-max_context_length_tokens:].to(device)
     torch.cuda.empty_cache()
+    global total_count
+    total_count += 1
+    print(total_count)
+    if total_count % 50 == 0 :
+        os.system("nvidia-smi")
     with torch.no_grad():
+        for x in greedy_search(input_ids,model,tokenizer,stop_words=["[|Human|]", "[|AI|]"],max_length=max_length_tokens,temperature=temperature,top_p=top_p):
+            if is_stop_word_or_prefix(x,["[|Human|]", "[|AI|]"]) is False:
                 if "[|Human|]" in x:
+                    x = x[:x.index("[|Human|]")].strip()
                 if "[|AI|]" in x:
+                    x = x[:x.index("[|AI|]")].strip()
+                x = x.strip()
+                a, b=   [[y[0],convert_to_markdown(y[1])] for y in history]+[[text, convert_to_markdown(x)]],history + [[text,x]]
                 yield a, b, "Generating..."
             if shared_state.interrupted:
                 shared_state.recover()
                     return
                 except:
                     pass
+    del input_ids
+    gc.collect()
     torch.cuda.empty_cache()
+    #print(text)
+    #print(x)
+    #print("="*80)
     try:
+        yield a,b,"Generate: Success"
     except:
         pass
 def retry(
+        text,
         chatbot,
         history,
         top_p,
         temperature,
         max_length_tokens,
         max_context_length_tokens,
+        ):
+    logging.info("Retry...")
+    if len(history) == 0:
+        yield chatbot, history, f"Empty context"
+        return
+    chatbot.pop()
+    inputs = history.pop()[0]
+    for x in predict(inputs,chatbot,history,top_p,temperature,max_length_tokens,max_context_length_tokens):
         yield x
                     submitBtn = gr.Button("Send")
                 with gr.Column(min_width=70, scale=1):
                     cancelBtn = gr.Button("Stop")
             with gr.Row(scale=1):
                 emptyBtn = gr.Button(
                     "🧹 New Conversation",
                 )
                 retryBtn = gr.Button("🔄 Regenerate")
+                delLastBtn = gr.Button("🗑️ Remove Last Turn")
         with gr.Column():
             with gr.Column(min_width=50, scale=1):
                 with gr.Tab(label="Parameter Setting"):
                     max_length_tokens = gr.Slider(
                         minimum=0,
                         maximum=512,
+                        value=256,
                         step=8,
                         interactive=True,
                         label="Max Generation Tokens",
         show_progress=True,
     )
+    reset_args = dict(
+        fn=reset_textbox, inputs=[], outputs=[user_input, status_display]
+    )
     # Chatbot
     transfer_input_args = dict(
+        fn=transfer_input, inputs=[user_input], outputs=[user_question, user_input, submitBtn], show_progress=True
     )
+    predict_event1 = user_input.submit(**transfer_input_args).then(**predict_args)
+    predict_event2 = submitBtn.click(**transfer_input_args).then(**predict_args)
     emptyBtn.click(
         reset_state,
     )
     emptyBtn.click(**reset_args)
+    predict_event3 = retryBtn.click(**retry_args)
     delLastBtn.click(
         delete_last_conversation,
         [chatbot, history, status_display],
         show_progress=True,
     )
+    cancelBtn.click(
+        cancel_outputing, [], [status_display],
+        cancels=[
+            predict_event1,predict_event2,predict_event3
+        ]
+    )
 demo.title = "Baize"
+demo.queue(concurrency_count=1).launch()