Spaces:

FlameF0X
/

lfm2

Running

FlameF0X commited on Oct 10

Commit

13f0e11

verified ·

1 Parent(s): 0d4961f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,8 +24,7 @@ def load_model(model_key):
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-        device_map=None,  # Disable meta/offload shenanigans
     ).to(device)
     model_cache[model_key] = (tokenizer, model)
@@ -36,13 +35,12 @@ def chat_with_model(message, history, model_choice):
     tokenizer, model = load_model(model_choice)
     device = model.device
-    # Convert the Gradio message history into a string prompt
     prompt = ""
     for msg in history:
-        if msg["role"] == "user":
-            prompt += f"User: {msg['content']}\n"
-        elif msg["role"] == "assistant":
-            prompt += f"Assistant: {msg['content']}\n"
     prompt += f"User: {message}\nAssistant:"
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
@@ -60,15 +58,19 @@ def chat_with_model(message, history, model_choice):
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
-    partial = ""
     for new_text in streamer:
-        partial += new_text
-        yield partial
 def create_demo():
     with gr.Blocks(title="LiquidAI Chat Playground") as demo:
-        gr.Markdown("## 💧 LiquidAI Chat Interface")
         model_choice = gr.Dropdown(
             label="Select Model",

     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        dtype=torch.float16 if device == "cuda" else torch.float32,
     ).to(device)
     model_cache[model_key] = (tokenizer, model)
     tokenizer, model = load_model(model_choice)
     device = model.device
+    # Build the prompt from previous conversation
     prompt = ""
     for msg in history:
+        role = msg["role"]
+        content = msg["content"]
+        prompt += f"{role.capitalize()}: {content}\n"
     prompt += f"User: {message}\nAssistant:"
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
+    partial_text = ""
     for new_text in streamer:
+        partial_text += new_text
+        # Yield full chat including this updated assistant message
+        yield history + [
+            {"role": "user", "content": message},
+            {"role": "assistant", "content": partial_text},
+        ]
 def create_demo():
     with gr.Blocks(title="LiquidAI Chat Playground") as demo:
+        gr.Markdown("## 💧 LiquidAI Chat Playground")
         model_choice = gr.Dropdown(
             label="Select Model",