Spaces:

Robzy
/

llm

Runtime error

Robzy commited on Dec 10, 2024

Commit

ce0b3e9

1 Parent(s): c7f6c4f

new models

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,13 +3,18 @@ from llama_cpp import Llama
 # Load models
 llm = Llama.from_pretrained(
-    repo_id="Robzy/Llama-3.2-1B-Instruct-Finetuned-q4_k_m",
     filename="unsloth.Q4_K_M.gguf",
 )
 llm2 = Llama.from_pretrained(
-    repo_id="Robzy/Llama-3.2-1B-Instruct-Finetuned-CodeData-q4_k_m",
-    filename="unsloth.Q4_K_M.gguf",
 )
 # Define prediction functions
@@ -62,7 +67,7 @@ def predict3(message, history, model):
     messages.append({"role": "user", "content": message})
     response = ""
-    for chunk in llm2.create_chat_completion(
         stream=True,
         messages=messages,
     ):
@@ -75,12 +80,10 @@ def predict3(message, history, model):
 # Define ChatInterfaces
 io1 = gr.ChatInterface(predict, title="4-bit")
-io2 = gr.ChatInterface(predict2, title="8-bit")  # Placeholder
-io3 = gr.ChatInterface(predict3, title="16-bit")
-io4 = gr.ChatInterface(predict2, title="32-bit")  # Placeholder
 # Dropdown and visibility mapping
-chat_interfaces = {"4-bit": io1, "8-bit": io2, "16-bit": io3, "32-bit": io4}
 # Define UI
 with gr.Blocks() as demo:
@@ -88,13 +91,9 @@ with gr.Blocks() as demo:
     with gr.Tab("4-bit"):
         io1.render()
-    with gr.Tab("8-bit"):
         io2.render()
-    with gr.Tab("16-bit"):
         io3.render()
-    with gr.Tab("32-bit"):
-        io4.render()
 demo.launch()

 # Load models
 llm = Llama.from_pretrained(
+    repo_id="Robzy/lora_model_CodeData_120k",
     filename="unsloth.Q4_K_M.gguf",
 )
 llm2 = Llama.from_pretrained(
+    repo_id="Robzy/lora_model_CodeData_120k",
+    filename="unsloth.Q5_K_M.gguf",
+)
+llm3     = Llama.from_pretrained(
+    repo_id="Robzy/lora_model_CodeData_120k",
+    filename="unsloth.Q8_0.gguf",
 )
 # Define prediction functions
     messages.append({"role": "user", "content": message})
     response = ""
+    for chunk in llm3.create_chat_completion(
         stream=True,
         messages=messages,
     ):
 # Define ChatInterfaces
 io1 = gr.ChatInterface(predict, title="4-bit")
+io2 = gr.ChatInterface(predict2, title="5-bit")  # Placeholder
+io3 = gr.ChatInterface(predict3, title="8-bit")
 # Dropdown and visibility mapping
+chat_interfaces = {"4-bit": io1, "5-bit": io2, "8-bit": io3}
 # Define UI
 with gr.Blocks() as demo:
     with gr.Tab("4-bit"):
         io1.render()
+    with gr.Tab("5-bit"):
         io2.render()
+    with gr.Tab("8-bit"):
         io3.render()
 demo.launch()