FractalAIR commited on
Commit
f50673c
·
verified ·
1 Parent(s): 18c4432

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -35
app.py CHANGED
@@ -5,41 +5,31 @@ import torch
5
 
6
  MODEL_ID = "FractalAIResearch/Fathom-R1-14B"
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  @spaces.GPU
9
- def chat_with_model(message, history, max_tokens, temperature):
10
  try:
11
- print("Loading model...")
12
-
13
- # Load tokenizer
14
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
15
-
16
- # Load model WITHOUT device_map to avoid CPU placement
17
- model = AutoModelForCausalLM.from_pretrained(
18
- MODEL_ID,
19
- torch_dtype=torch.bfloat16,
20
- trust_remote_code=True,
21
- # Remove device_map="auto" - it's causing CPU placement
22
- )
23
-
24
- # AFTER loading, move to GPU explicitly
25
- if torch.cuda.is_available():
26
- model = model.to('cuda')
27
- print(f"✅ Model moved to GPU: {next(model.parameters()).device}")
28
- else:
29
- print("❌ No GPU available")
30
-
31
- if tokenizer.pad_token is None:
32
- tokenizer.pad_token = tokenizer.eos_token
33
-
34
  # Simple prompt format
35
  prompt = f"User: {message}\nAssistant:"
36
 
37
- # Tokenize and move inputs to same device as model
38
  inputs = tokenizer(prompt, return_tensors="pt")
39
- device = next(model.parameters()).device
40
- inputs = {k: v.to(device) for k, v in inputs.items()}
41
-
42
- print(f"✅ Inputs on device: {inputs['input_ids'].device}")
43
 
44
  # Generate
45
  with torch.no_grad():
@@ -55,17 +45,12 @@ def chat_with_model(message, history, max_tokens, temperature):
55
  # Decode response
56
  response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
57
 
58
- print(f"✅ Generated: {response[:50]}...")
59
-
60
  # Update history
61
  history.append([message, response])
62
  return history, history, ""
63
 
64
  except Exception as e:
65
  error_msg = f"Error: {str(e)}"
66
- print(f"Full error: {e}")
67
- import traceback
68
- traceback.print_exc()
69
  history.append([message, error_msg])
70
  return history, history, ""
71
 
@@ -125,7 +110,7 @@ with gr.Blocks(title="Fathom R1 14B Chatbot") as demo:
125
  def bot_respond(hist, max_tok, temp):
126
  if hist and hist[-1][1] is None:
127
  message = hist[-1][0]
128
- _, updated_hist, _ = chat_with_model(message, hist[:-1], max_tok, temp)
129
  return updated_hist, updated_hist
130
  return hist, hist
131
 
 
5
 
6
  MODEL_ID = "FractalAIResearch/Fathom-R1-14B"
7
 
8
+ # Load model and tokenizer OUTSIDE the GPU function (following official docs)
9
+ print("Loading model and tokenizer...")
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ MODEL_ID,
13
+ torch_dtype=torch.bfloat16,
14
+ trust_remote_code=True
15
+ )
16
+
17
+ if tokenizer.pad_token is None:
18
+ tokenizer.pad_token = tokenizer.eos_token
19
+
20
+ # Move to GPU (following official docs pattern)
21
+ model.to('cuda')
22
+ print("Model loaded and moved to GPU")
23
+
24
  @spaces.GPU
25
+ def generate_response(message, history, max_tokens, temperature):
26
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Simple prompt format
28
  prompt = f"User: {message}\nAssistant:"
29
 
30
+ # Tokenize
31
  inputs = tokenizer(prompt, return_tensors="pt")
32
+ inputs = {k: v.to('cuda') for k, v in inputs.items()}
 
 
 
33
 
34
  # Generate
35
  with torch.no_grad():
 
45
  # Decode response
46
  response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
47
 
 
 
48
  # Update history
49
  history.append([message, response])
50
  return history, history, ""
51
 
52
  except Exception as e:
53
  error_msg = f"Error: {str(e)}"
 
 
 
54
  history.append([message, error_msg])
55
  return history, history, ""
56
 
 
110
  def bot_respond(hist, max_tok, temp):
111
  if hist and hist[-1][1] is None:
112
  message = hist[-1][0]
113
+ _, updated_hist, _ = generate_response(message, hist[:-1], max_tok, temp)
114
  return updated_hist, updated_hist
115
  return hist, hist
116