Spaces:

LuminLabs
/

flash

Sleeping

App Files Files Community

nova commited on Jan 16

Commit

410a8fc

verified ·

1 Parent(s): 92693e6

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -11,32 +11,35 @@ try:
     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
-        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
         device_map="auto",
         trust_remote_code=True
     )
 except Exception as e:
     print(f"❌ Error loading model: {e}")
 def chat(message, history):
-    # Prepare messages list for Qwen
     messages = []
-    # System Prompt
-    messages.append({"role": "system", "content": "You are Lumin Flash, a helpful and fast AI assistant."})
-    # History
     for user_msg, bot_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
-    # Current Message
     messages.append({"role": "user", "content": message})
     # Tokenize with template
-    text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
     inputs = tokenizer([text], return_tensors="pt").to(device)
     # Streamer
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
@@ -63,10 +66,7 @@ demo = gr.ChatInterface(
     fn=chat,
     chatbot=gr.Chatbot(height=500),
     textbox=gr.Textbox(placeholder="Ask Lumin Flash...", container=False, scale=7),
-    title=f"Lumin Flash ({MODEL_ID})",
-    retry_btn=None,
-    undo_btn=None,
-    clear_btn="Clear",
 )
 if __name__ == "__main__":
     demo.queue().launch(server_name="0.0.0.0", server_port=7860)

     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
+        dtype=torch.float16 if device == "cuda" else torch.float32,
         device_map="auto",
         trust_remote_code=True
     )
 except Exception as e:
     print(f"❌ Error loading model: {e}")
 def chat(message, history):
+    # Prepare messages list for TinyLlama
+    # TinyLlama format: <|user|>\n...\n<|assistant|>\n...
+    # But applying chat template is safer if available.
     messages = []
+    messages.append({"role": "system", "content": "You are Lumin Flash, a helpful AI assistant."})
     for user_msg, bot_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
     # Tokenize with template
+    try:
+        text = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+    except:
+        # Fallback manual format if template fails
+        text = f"<|system|>\nYou are Lumin Flash.<|end|>\n<|user|>\n{message}<|end|>\n<|assistant|>\n"
     inputs = tokenizer([text], return_tensors="pt").to(device)
     # Streamer
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     fn=chat,
     chatbot=gr.Chatbot(height=500),
     textbox=gr.Textbox(placeholder="Ask Lumin Flash...", container=False, scale=7),
+    title=f"Lumin Flash ({MODEL_ID})"
 )
 if __name__ == "__main__":
     demo.queue().launch(server_name="0.0.0.0", server_port=7860)