FractalAIR commited on
Commit
c34c8d5
·
verified ·
1 Parent(s): a23ac8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -8,6 +8,8 @@ MODEL_ID = "FractalAIResearch/Fathom-R1-14B"
8
  @spaces.GPU
9
  def chat_with_model(message, history, max_tokens, temperature):
10
  try:
 
 
11
  # Load model and tokenizer
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
13
  model = AutoModelForCausalLM.from_pretrained(
@@ -16,15 +18,24 @@ def chat_with_model(message, history, max_tokens, temperature):
16
  trust_remote_code=True
17
  )
18
 
 
 
 
 
 
 
 
19
  if tokenizer.pad_token is None:
20
  tokenizer.pad_token = tokenizer.eos_token
21
 
22
  # Simple prompt format
23
  prompt = f"User: {message}\nAssistant:"
24
 
25
- # Tokenize
26
  inputs = tokenizer(prompt, return_tensors="pt")
27
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
 
 
28
 
29
  # Generate
30
  with torch.no_grad():
@@ -40,12 +51,15 @@ def chat_with_model(message, history, max_tokens, temperature):
40
  # Decode response
41
  response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
42
 
 
 
43
  # Update history
44
  history.append([message, response])
45
  return history, history, ""
46
 
47
  except Exception as e:
48
- error_msg = f"Error: {str(e)}"
 
49
  history.append([message, error_msg])
50
  return history, history, ""
51
 
@@ -89,7 +103,7 @@ with gr.Blocks(title="Fathom R1 14B Chatbot") as demo:
89
  gr.Examples(
90
  examples=[
91
  "Solve: 2x + 5 = 15",
92
- "Explain quantum mechanics simply",
93
  "What is the derivative of x²?",
94
  ],
95
  inputs=msg
 
8
  @spaces.GPU
9
  def chat_with_model(message, history, max_tokens, temperature):
10
  try:
11
+ print("🔥 GPU allocated, loading model...")
12
+
13
  # Load model and tokenizer
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
15
  model = AutoModelForCausalLM.from_pretrained(
 
18
  trust_remote_code=True
19
  )
20
 
21
+ # EXPLICITLY move model to GPU
22
+ model = model.cuda()
23
+
24
+ print(f"✅ Model loaded on device: {model.device}")
25
+ print(f"🔥 GPU available: {torch.cuda.is_available()}")
26
+ print(f"🔥 GPU device count: {torch.cuda.device_count()}")
27
+
28
  if tokenizer.pad_token is None:
29
  tokenizer.pad_token = tokenizer.eos_token
30
 
31
  # Simple prompt format
32
  prompt = f"User: {message}\nAssistant:"
33
 
34
+ # Tokenize and move to GPU
35
  inputs = tokenizer(prompt, return_tensors="pt")
36
+ inputs = {k: v.cuda() for k, v in inputs.items()}
37
+
38
+ print(f"✅ Inputs moved to: {inputs['input_ids'].device}")
39
 
40
  # Generate
41
  with torch.no_grad():
 
51
  # Decode response
52
  response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
53
 
54
+ print(f"✅ Generated response: {response[:100]}...")
55
+
56
  # Update history
57
  history.append([message, response])
58
  return history, history, ""
59
 
60
  except Exception as e:
61
+ error_msg = f"Error: {str(e)}"
62
+ print(error_msg)
63
  history.append([message, error_msg])
64
  return history, history, ""
65
 
 
103
  gr.Examples(
104
  examples=[
105
  "Solve: 2x + 5 = 15",
106
+ "Explain quantum mechanics simply",
107
  "What is the derivative of x²?",
108
  ],
109
  inputs=msg