nova commited on
Commit
410a8fc
·
verified ·
1 Parent(s): 92693e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -11,32 +11,35 @@ try:
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  MODEL_ID,
14
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
15
  device_map="auto",
16
  trust_remote_code=True
17
  )
18
  except Exception as e:
19
  print(f"❌ Error loading model: {e}")
20
  def chat(message, history):
21
- # Prepare messages list for Qwen
 
 
 
22
  messages = []
23
- # System Prompt
24
- messages.append({"role": "system", "content": "You are Lumin Flash, a helpful and fast AI assistant."})
25
 
26
- # History
27
  for user_msg, bot_msg in history:
28
  messages.append({"role": "user", "content": user_msg})
29
  messages.append({"role": "assistant", "content": bot_msg})
30
 
31
- # Current Message
32
  messages.append({"role": "user", "content": message})
33
  # Tokenize with template
34
- text = tokenizer.apply_chat_template(
35
- messages,
36
- tokenize=False,
37
- add_generation_prompt=True
38
- )
39
-
 
 
 
40
  inputs = tokenizer([text], return_tensors="pt").to(device)
41
  # Streamer
42
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
@@ -63,10 +66,7 @@ demo = gr.ChatInterface(
63
  fn=chat,
64
  chatbot=gr.Chatbot(height=500),
65
  textbox=gr.Textbox(placeholder="Ask Lumin Flash...", container=False, scale=7),
66
- title=f"Lumin Flash ({MODEL_ID})",
67
- retry_btn=None,
68
- undo_btn=None,
69
- clear_btn="Clear",
70
  )
71
  if __name__ == "__main__":
72
  demo.queue().launch(server_name="0.0.0.0", server_port=7860)
 
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  MODEL_ID,
14
+ dtype=torch.float16 if device == "cuda" else torch.float32,
15
  device_map="auto",
16
  trust_remote_code=True
17
  )
18
  except Exception as e:
19
  print(f"❌ Error loading model: {e}")
20
  def chat(message, history):
21
+ # Prepare messages list for TinyLlama
22
+ # TinyLlama format: <|user|>\n...\n<|assistant|>\n...
23
+ # But applying chat template is safer if available.
24
+
25
  messages = []
26
+ messages.append({"role": "system", "content": "You are Lumin Flash, a helpful AI assistant."})
 
27
 
 
28
  for user_msg, bot_msg in history:
29
  messages.append({"role": "user", "content": user_msg})
30
  messages.append({"role": "assistant", "content": bot_msg})
31
 
 
32
  messages.append({"role": "user", "content": message})
33
  # Tokenize with template
34
+ try:
35
+ text = tokenizer.apply_chat_template(
36
+ messages,
37
+ tokenize=False,
38
+ add_generation_prompt=True
39
+ )
40
+ except:
41
+ # Fallback manual format if template fails
42
+ text = f"<|system|>\nYou are Lumin Flash.<|end|>\n<|user|>\n{message}<|end|>\n<|assistant|>\n"
43
  inputs = tokenizer([text], return_tensors="pt").to(device)
44
  # Streamer
45
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
66
  fn=chat,
67
  chatbot=gr.Chatbot(height=500),
68
  textbox=gr.Textbox(placeholder="Ask Lumin Flash...", container=False, scale=7),
69
+ title=f"Lumin Flash ({MODEL_ID})"
 
 
 
70
  )
71
  if __name__ == "__main__":
72
  demo.queue().launch(server_name="0.0.0.0", server_port=7860)