ButterM40 commited on
Commit
a94158b
Β·
1 Parent(s): bf6aead

Fix syntax error: add missing except clause and fix indentation

Browse files
Files changed (1) hide show
  1. backend/models/character_manager.py +30 -27
backend/models/character_manager.py CHANGED
@@ -75,34 +75,37 @@ class CharacterManager:
75
  use_fast=True,
76
  cache_dir=None
77
  )
78
-
79
- # Smart GPU/CPU loading
80
- cuda_available = torch.cuda.is_available()
81
- use_gpu = settings.DEVICE == "cuda" and cuda_available
82
-
83
- if use_gpu:
84
- gpu_name = torch.cuda.get_device_name(0)
85
- gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
86
- logger.info(f"πŸš€ Loading with GPU: {gpu_name} ({gpu_memory:.1f}GB VRAM)")
87
 
88
- self.base_model = AutoModelForCausalLM.from_pretrained(
89
- settings.BASE_MODEL,
90
- torch_dtype=torch.float16, # Use FP16 for GPU
91
- device_map="auto",
92
- trust_remote_code=True,
93
- low_cpu_mem_usage=True,
94
- use_cache=True,
95
- load_in_8bit=False, # Can enable for very large models
96
- load_in_4bit=False # Can enable for even larger models
97
- )
98
- else:
99
- logger.info("πŸ’» Loading with CPU (CUDA not available or disabled)...")
100
- self.base_model = AutoModelForCausalLM.from_pretrained(
101
- settings.BASE_MODEL,
102
- torch_dtype=torch.float32,
103
- trust_remote_code=True,
104
- use_cache=True
105
- )
 
 
 
 
 
 
 
 
 
 
 
 
106
  except Exception as e:
107
  logger.error(f"Failed to load base model {settings.BASE_MODEL}: {e}")
108
  logger.info("Trying alternative Qwen models...")
 
75
  use_fast=True,
76
  cache_dir=None
77
  )
 
 
 
 
 
 
 
 
 
78
 
79
+ # Smart GPU/CPU loading
80
+ cuda_available = torch.cuda.is_available()
81
+ use_gpu = settings.DEVICE == "cuda" and cuda_available
82
+
83
+ if use_gpu:
84
+ gpu_name = torch.cuda.get_device_name(0)
85
+ gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
86
+ logger.info(f"πŸš€ Loading with GPU: {gpu_name} ({gpu_memory:.1f}GB VRAM)")
87
+
88
+ self.base_model = AutoModelForCausalLM.from_pretrained(
89
+ settings.BASE_MODEL,
90
+ torch_dtype=torch.float16, # Use FP16 for GPU
91
+ device_map="auto",
92
+ trust_remote_code=True,
93
+ low_cpu_mem_usage=True,
94
+ use_cache=True,
95
+ load_in_8bit=False, # Can enable for very large models
96
+ load_in_4bit=False # Can enable for even larger models
97
+ )
98
+ else:
99
+ logger.info("πŸ’» Loading with CPU (CUDA not available or disabled)...")
100
+ self.base_model = AutoModelForCausalLM.from_pretrained(
101
+ settings.BASE_MODEL,
102
+ torch_dtype=torch.float32,
103
+ trust_remote_code=True,
104
+ use_cache=True
105
+ )
106
+ except Exception as inner_e:
107
+ logger.error(f"Failed to load fallback model in inner try: {inner_e}")
108
+ raise inner_e
109
  except Exception as e:
110
  logger.error(f"Failed to load base model {settings.BASE_MODEL}: {e}")
111
  logger.info("Trying alternative Qwen models...")