Spaces:

FlameF0X
/

lfm2

Running

App Files Files Community

FlameF0X commited on Oct 10

Commit

0d4961f

verified ·

1 Parent(s): 393c05e

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -48

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import torch
 from threading import Thread
-# Available model options
 MODEL_NAMES = {
     "LFM2-350M": "LiquidAI/LFM2-350M",
     "LFM2-700M": "LiquidAI/LFM2-700M",
@@ -12,91 +11,94 @@ MODEL_NAMES = {
     "LFM2-8B-A1B": "LiquidAI/LFM2-8B-A1B",
 }
-# Cache for loaded models
 model_cache = {}
 def load_model(model_key):
-    """Load and cache the selected model."""
     if model_key in model_cache:
         return model_cache[model_key]
     model_name = MODEL_NAMES[model_key]
     print(f"Loading {model_name}...")
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-        device_map="auto"
-    )
     model_cache[model_key] = (tokenizer, model)
     return tokenizer, model
 def chat_with_model(message, history, model_choice):
     tokenizer, model = load_model(model_choice)
-    # Build the chat history as a string
     prompt = ""
-    for user_msg, bot_msg in history:
-        prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
     prompt += f"User: {message}\nAssistant:"
-    # Streaming setup
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     generation_kwargs = dict(
         **inputs,
         streamer=streamer,
         max_new_tokens=256,
         temperature=0.7,
         do_sample=True,
-        top_p=0.9
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
-    partial_text = ""
     for new_text in streamer:
-        partial_text += new_text
-        yield partial_text
 def create_demo():
-    with gr.Blocks(title="LiquidAI Chat Interface") as demo:
-        gr.Markdown("## 💧 LiquidAI Model Chat Playground")
-        with gr.Row():
-            model_choice = gr.Dropdown(
-                label="Select Model",
-                choices=list(MODEL_NAMES.keys()),
-                value="LFM2-1.2B"
-            )
-        chatbot = gr.Chatbot(label="Chat with the model", height=450)
-        msg = gr.Textbox(label="Your message", placeholder="Type a message and hit Enter")
-        clear = gr.Button("Clear Chat")
-        def user_submit(user_message, chat_history, model_choice):
-            chat_history = chat_history + [(user_message, "")]
-            return "", chat_history, model_choice
-        msg.submit(
-            user_submit,
-            [msg, chatbot, model_choice],
-            [msg, chatbot, model_choice],
-            queue=False
-        ).then(
-            chat_with_model,
-            [msg, chatbot, model_choice],
-            chatbot
         )
-        clear.click(lambda: None, None, chatbot, queue=False)
     return demo
 if __name__ == "__main__":
     demo = create_demo()
-    demo.queue(max_size=32)
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 import torch
 from threading import Thread
 MODEL_NAMES = {
     "LFM2-350M": "LiquidAI/LFM2-350M",
     "LFM2-700M": "LiquidAI/LFM2-700M",
     "LFM2-8B-A1B": "LiquidAI/LFM2-8B-A1B",
 }
 model_cache = {}
 def load_model(model_key):
     if model_key in model_cache:
         return model_cache[model_key]
     model_name = MODEL_NAMES[model_key]
     print(f"Loading {model_name}...")
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        device_map=None,  # Disable meta/offload shenanigans
+    ).to(device)
     model_cache[model_key] = (tokenizer, model)
     return tokenizer, model
 def chat_with_model(message, history, model_choice):
     tokenizer, model = load_model(model_choice)
+    device = model.device
+    # Convert the Gradio message history into a string prompt
     prompt = ""
+    for msg in history:
+        if msg["role"] == "user":
+            prompt += f"User: {msg['content']}\n"
+        elif msg["role"] == "assistant":
+            prompt += f"Assistant: {msg['content']}\n"
     prompt += f"User: {message}\nAssistant:"
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
     generation_kwargs = dict(
         **inputs,
         streamer=streamer,
         max_new_tokens=256,
         temperature=0.7,
+        top_p=0.9,
         do_sample=True,
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
+    partial = ""
     for new_text in streamer:
+        partial += new_text
+        yield partial
 def create_demo():
+    with gr.Blocks(title="LiquidAI Chat Playground") as demo:
+        gr.Markdown("## 💧 LiquidAI Chat Interface")
+        model_choice = gr.Dropdown(
+            label="Select Model",
+            choices=list(MODEL_NAMES.keys()),
+            value="LFM2-1.2B"
         )
+        chatbot = gr.Chatbot(
+            label="Chat with LiquidAI",
+            type="messages",
+            height=450
+        )
+        msg = gr.Textbox(label="Your message", placeholder="Type something...")
+        clear = gr.Button("Clear")
+        def add_user_message(user_message, chat_history):
+            chat_history = chat_history + [{"role": "user", "content": user_message}]
+            return "", chat_history
+        msg.submit(add_user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
+            chat_with_model, [msg, chatbot, model_choice], chatbot
+        )
+        clear.click(lambda: [], None, chatbot, queue=False)
     return demo
 if __name__ == "__main__":
     demo = create_demo()
+    demo.queue()
     demo.launch(server_name="0.0.0.0", server_port=7860)