FLUX-Vision

Sleeping

gokaygokay commited on Jul 22, 2024

Commit

dfca54f

verified ·

1 Parent(s): 78c280f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -77,14 +77,21 @@ hf_hub_download(
     local_dir="./models"
 )
-llm = None
-llm_model = None
 @spaces.GPU(duration=120)
 def respond(
     message,
     history: list[tuple[str, str]],
-    model,
     system_message,
     max_tokens,
     temperature,
@@ -92,25 +99,8 @@ def respond(
     top_k,
     repeat_penalty,
 ):
     chat_template = MessagesFormatterType.MISTRAL
-    global llm
-    global llm_model
-    if llm is None or llm_model != model:
-        llm = Llama(
-            model_path=f"models/{model}",
-            flash_attn=True,
-            n_gpu_layers=81,
-            n_batch=1024,
-            n_ctx=32768,
-        )
-        llm_model = model
-    provider = LlamaCppPythonProvider(llm)
     agent = LlamaCppAgent(
         provider,
         system_prompt=f"{system_message}",
@@ -163,12 +153,6 @@ description = """<p><center>
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Dropdown([
-                'Mistral-Nemo-Instruct-2407.Q5_K_M.gguf'
-            ],
-            value="Mistral-Nemo-Instruct-2407.Q5_K_M.gguf",
-            label="Model"
-        ),
         gr.Textbox(value="You are a helpful assistant.", label="System message"),
         gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),

     local_dir="./models"
 )
+# Initialize LLM outside the respond function
+llm = Llama(
+    model_path="models/Mistral-Nemo-Instruct-2407.Q5_K_M.gguf",
+    flash_attn=True,
+    n_gpu_layers=81,
+    n_batch=1024,
+    n_ctx=32768,
+)
+provider = LlamaCppPythonProvider(llm)
 @spaces.GPU(duration=120)
 def respond(
     message,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     temperature,
     top_k,
     repeat_penalty,
 ):
     chat_template = MessagesFormatterType.MISTRAL
     agent = LlamaCppAgent(
         provider,
         system_prompt=f"{system_message}",
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Textbox(value="You are a helpful assistant.", label="System message"),
         gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),