nova commited on
Commit
a155f45
·
verified ·
1 Parent(s): 410a8fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -2,8 +2,8 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  from threading import Thread
5
- # Model Configuration: TinyLlama-1.1B (Classic, Fast, Non-Qwen)
6
- MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
7
  # Check Device
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
  print(f"🚀 Loading {MODEL_ID} on {device}...")
@@ -18,19 +18,20 @@ try:
18
  except Exception as e:
19
  print(f"❌ Error loading model: {e}")
20
  def chat(message, history):
21
- # Prepare messages list for TinyLlama
22
- # TinyLlama format: <|user|>\n...\n<|assistant|>\n...
23
- # But applying chat template is safer if available.
24
-
25
  messages = []
26
- messages.append({"role": "system", "content": "You are Lumin Flash, a helpful AI assistant."})
 
 
 
 
27
 
28
  for user_msg, bot_msg in history:
29
  messages.append({"role": "user", "content": user_msg})
30
  messages.append({"role": "assistant", "content": bot_msg})
31
 
32
  messages.append({"role": "user", "content": message})
33
- # Tokenize with template
34
  try:
35
  text = tokenizer.apply_chat_template(
36
  messages,
@@ -38,17 +39,17 @@ def chat(message, history):
38
  add_generation_prompt=True
39
  )
40
  except:
41
- # Fallback manual format if template fails
42
- text = f"<|system|>\nYou are Lumin Flash.<|end|>\n<|user|>\n{message}<|end|>\n<|assistant|>\n"
43
  inputs = tokenizer([text], return_tensors="pt").to(device)
44
  # Streamer
45
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
46
 
47
- # Generate
48
  generation_kwargs = dict(
49
  inputs,
50
  streamer=streamer,
51
- max_new_tokens=512,
52
  temperature=0.7,
53
  do_sample=True,
54
  top_p=0.9
@@ -64,9 +65,9 @@ def chat(message, history):
64
  # Gradio Interface
65
  demo = gr.ChatInterface(
66
  fn=chat,
67
- chatbot=gr.Chatbot(height=500),
68
  textbox=gr.Textbox(placeholder="Ask Lumin Flash...", container=False, scale=7),
69
- title=f"Lumin Flash ({MODEL_ID})"
70
  )
71
  if __name__ == "__main__":
72
  demo.queue().launch(server_name="0.0.0.0", server_port=7860)
 
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  from threading import Thread
5
+ # Model Configuration: Qwen 2.5 1.5B (Much Smarter, still runs on Free CPU)
6
+ MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
7
  # Check Device
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
  print(f"🚀 Loading {MODEL_ID} on {device}...")
 
18
  except Exception as e:
19
  print(f"❌ Error loading model: {e}")
20
  def chat(message, history):
21
+ # Prepare messages list
 
 
 
22
  messages = []
23
+ # Enhanced System Prompt
24
+ messages.append({
25
+ "role": "system",
26
+ "content": "You are Lumin Flash, an advanced AI assistant created by Lumin Web. You are helpful, precise, and professional. Answer questions clearly and concisely. Do not cut off sentences."
27
+ })
28
 
29
  for user_msg, bot_msg in history:
30
  messages.append({"role": "user", "content": user_msg})
31
  messages.append({"role": "assistant", "content": bot_msg})
32
 
33
  messages.append({"role": "user", "content": message})
34
+ # Tokenize with chat template
35
  try:
36
  text = tokenizer.apply_chat_template(
37
  messages,
 
39
  add_generation_prompt=True
40
  )
41
  except:
42
+ # Fallback manual format for Qwen (ChatML style)
43
+ text = f"<|im_start|>system\nYou are Lumin Flash.<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
44
  inputs = tokenizer([text], return_tensors="pt").to(device)
45
  # Streamer
46
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
47
 
48
+ # Generate parameters (Tuned for smarts)
49
  generation_kwargs = dict(
50
  inputs,
51
  streamer=streamer,
52
+ max_new_tokens=1024, # Increased to prevent cut-offs
53
  temperature=0.7,
54
  do_sample=True,
55
  top_p=0.9
 
65
  # Gradio Interface
66
  demo = gr.ChatInterface(
67
  fn=chat,
68
+ chatbot=gr.Chatbot(height=500, type="messages"), # Updated type for newer Gradio versions
69
  textbox=gr.Textbox(placeholder="Ask Lumin Flash...", container=False, scale=7),
70
+ title=f"Lumin Flash (Smart Edition)"
71
  )
72
  if __name__ == "__main__":
73
  demo.queue().launch(server_name="0.0.0.0", server_port=7860)