Fathom

Sleeping

App Files Files Community

FractalAIR commited on Aug 25

Commit

c34c8d5

verified ·

1 Parent(s): a23ac8b

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -4

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ MODEL_ID = "FractalAIResearch/Fathom-R1-14B"
 @spaces.GPU
 def chat_with_model(message, history, max_tokens, temperature):
     try:
         # Load model and tokenizer
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
         model = AutoModelForCausalLM.from_pretrained(
@@ -16,15 +18,24 @@ def chat_with_model(message, history, max_tokens, temperature):
             trust_remote_code=True
         )
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
         # Simple prompt format
         prompt = f"User: {message}\nAssistant:"
-        # Tokenize
         inputs = tokenizer(prompt, return_tensors="pt")
-        inputs = {k: v.to(model.device) for k, v in inputs.items()}
         # Generate
         with torch.no_grad():
@@ -40,12 +51,15 @@ def chat_with_model(message, history, max_tokens, temperature):
         # Decode response
         response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
         # Update history
         history.append([message, response])
         return history, history, ""
     except Exception as e:
-        error_msg = f"Error: {str(e)}"
         history.append([message, error_msg])
         return history, history, ""
@@ -89,7 +103,7 @@ with gr.Blocks(title="Fathom R1 14B Chatbot") as demo:
             gr.Examples(
                 examples=[
                     "Solve: 2x + 5 = 15",
-                    "Explain quantum mechanics simply",
                     "What is the derivative of x²?",
                 ],
                 inputs=msg

 @spaces.GPU
 def chat_with_model(message, history, max_tokens, temperature):
     try:
+        print("🔥 GPU allocated, loading model...")
         # Load model and tokenizer
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
         model = AutoModelForCausalLM.from_pretrained(
             trust_remote_code=True
         )
+        # EXPLICITLY move model to GPU
+        model = model.cuda()
+        print(f"✅ Model loaded on device: {model.device}")
+        print(f"🔥 GPU available: {torch.cuda.is_available()}")
+        print(f"🔥 GPU device count: {torch.cuda.device_count()}")
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
         # Simple prompt format
         prompt = f"User: {message}\nAssistant:"
+        # Tokenize and move to GPU
         inputs = tokenizer(prompt, return_tensors="pt")
+        inputs = {k: v.cuda() for k, v in inputs.items()}
+        print(f"✅ Inputs moved to: {inputs['input_ids'].device}")
         # Generate
         with torch.no_grad():
         # Decode response
         response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
+        print(f"✅ Generated response: {response[:100]}...")
         # Update history
         history.append([message, response])
         return history, history, ""
     except Exception as e:
+        error_msg = f"❌ Error: {str(e)}"
+        print(error_msg)
         history.append([message, error_msg])
         return history, history, ""
             gr.Examples(
                 examples=[
                     "Solve: 2x + 5 = 15",
+                    "Explain quantum mechanics simply",
                     "What is the derivative of x²?",
                 ],
                 inputs=msg