MaxLSB commited on
Commit
d7b3955
·
verified ·
1 Parent(s): d58284f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -14
app.py CHANGED
@@ -4,22 +4,19 @@ import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
  from threading import Thread
6
 
7
- @spaces.GPU
8
  def predict(message, history):
9
- torch.set_default_device("cuda")
10
-
11
- # Load model and tokenizer
12
  model_id = "kurakurai/Luth-LFM2-1.2B"
13
  tokenizer = AutoTokenizer.from_pretrained(model_id)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_id,
16
- device_map="auto",
17
- torch_dtype=torch.bfloat16,
18
  trust_remote_code=True,
19
- load_in_4bit=True, # Keeping 4-bit quantization for efficiency
20
- # attn_implementation="flash_attention_2" # Uncomment on compatible GPU
21
  )
22
-
23
  # Format conversation history for chat template
24
  messages = [{"role": "user" if i % 2 == 0 else "assistant", "content": msg}
25
  for conv in history for i, msg in enumerate(conv) if msg]
@@ -31,7 +28,7 @@ def predict(message, history):
31
  add_generation_prompt=True,
32
  return_tensors="pt",
33
  tokenize=True
34
- ).to('cuda')
35
 
36
  # Setup streamer for real-time output
37
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
@@ -67,9 +64,9 @@ gr.ChatInterface(
67
  Chat with [Luth-LFM2-1.2B](https://huggingface.co/kurakurai/Luth-LFM2-1.2B), a French-tuned version of LFM2-1.2B.
68
  """,
69
  examples=[
70
- "Peux-tu résoudre l'équation 3x - 7 = 11 pour x ?",
71
- "Explique la photosynthèse en termes simples.",
72
- "Écris un petit poème sur l'intelligence artificielle."
73
  ],
74
  theme=gr.themes.Soft(primary_hue="blue"),
75
- ).launch()
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
  from threading import Thread
6
 
7
+ # Remove GPU decorator since we are CPU-only
8
  def predict(message, history):
9
+ # Load model and tokenizer on CPU
 
 
10
  model_id = "kurakurai/Luth-LFM2-1.2B"
11
  tokenizer = AutoTokenizer.from_pretrained(model_id)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_id,
14
+ device_map="cpu", # CPU only
15
+ torch_dtype=torch.float16,
16
  trust_remote_code=True,
17
+ load_in_4bit=False # 4-bit quantization not supported on CPU
 
18
  )
19
+
20
  # Format conversation history for chat template
21
  messages = [{"role": "user" if i % 2 == 0 else "assistant", "content": msg}
22
  for conv in history for i, msg in enumerate(conv) if msg]
 
28
  add_generation_prompt=True,
29
  return_tensors="pt",
30
  tokenize=True
31
+ ).to('cpu') # CPU device
32
 
33
  # Setup streamer for real-time output
34
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
 
64
  Chat with [Luth-LFM2-1.2B](https://huggingface.co/kurakurai/Luth-LFM2-1.2B), a French-tuned version of LFM2-1.2B.
65
  """,
66
  examples=[
67
+ "Peux-tu résoudre l'équation 3x - 7 = 11 pour x ?",
68
+ "Explique la photosynthèse en termes simples.",
69
+ "Écris un petit poème sur l'intelligence artificielle."
70
  ],
71
  theme=gr.themes.Soft(primary_hue="blue"),
72
+ ).launch()