BoostedJonP commited on
Commit
476d1eb
·
1 Parent(s): 5119833

fixed cpu loading issue

Browse files
Files changed (2) hide show
  1. app.py +15 -26
  2. requirements.txt +2 -2
app.py CHANGED
@@ -40,36 +40,25 @@ def load_model():
40
  )
41
  else:
42
  logger.info("CUDA not available, loading with CPU optimizations")
43
- try:
44
- model = AutoModelForCausalLM.from_pretrained(
45
- MODEL_NAME,
46
- trust_remote_code=True,
47
- torch_dtype=torch.float32, # Use float32 for CPU
48
- device_map="cpu", # Explicitly set to CPU
49
- attn_implementation="eager",
50
- use_cache=True,
51
- cache_dir="/tmp/model_cache",
52
- low_cpu_mem_usage=True, # Helpful for CPU environments
53
- )
54
- except Exception as cpu_error:
55
- logger.warning(f"CPU loading failed with device_map: {cpu_error}")
56
- # Fallback: try without device_map
57
- logger.info("Trying fallback CPU loading without device_map")
58
- model = AutoModelForCausalLM.from_pretrained(
59
- MODEL_NAME,
60
- trust_remote_code=True,
61
- torch_dtype=torch.float32,
62
- attn_implementation="eager",
63
- use_cache=True,
64
- cache_dir="/tmp/model_cache",
65
- low_cpu_mem_usage=True,
66
- )
67
- # Move model to CPU manually
68
- model = model.to("cpu")
69
 
70
  logger.info("Model loaded successfully!")
 
 
 
71
  except Exception as e:
72
  logger.error(f"Error loading model: {e}")
 
73
  return None, None
74
 
75
  model.generation_config.use_cache = True
 
40
  )
41
  else:
42
  logger.info("CUDA not available, loading with CPU optimizations")
43
+ model = AutoModelForCausalLM.from_pretrained(
44
+ MODEL_NAME,
45
+ trust_remote_code=True,
46
+ torch_dtype=torch.float32,
47
+ attn_implementation="eager",
48
+ use_cache=True,
49
+ cache_dir="/tmp/model_cache",
50
+ low_cpu_mem_usage=True,
51
+ )
52
+ # Move model to CPU manually
53
+ model = model.to("cpu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  logger.info("Model loaded successfully!")
56
+ logger.info(f"Model device: {next(model.parameters()).device}")
57
+ logger.info(f"Model dtype: {next(model.parameters()).dtype}")
58
+
59
  except Exception as e:
60
  logger.error(f"Error loading model: {e}")
61
+ logger.error(f"Error type: {type(e).__name__}")
62
  return None, None
63
 
64
  model.generation_config.use_cache = True
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- torch>=2.0.0,<2.3.0
2
  transformers==4.48.0
3
  accelerate>=0.20.0
4
  gradio>=4.0.0,<5.0.0
5
- safetensors>=0.4.0
 
1
+ torch>=2.0.0
2
  transformers==4.48.0
3
  accelerate>=0.20.0
4
  gradio>=4.0.0,<5.0.0
5
+ safetensors>=0.4.0