XD-MU commited on
Commit
b8cbffa
·
1 Parent(s): 985454b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -31,6 +31,11 @@ except ImportError as e:
31
  genai = None
32
 
33
  # --- 环境设置 ---
 
 
 
 
 
34
  warnings.filterwarnings("ignore")
35
  os.environ['PYTHONWARNINGS'] = 'ignore'
36
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -134,25 +139,24 @@ def load_llm_model():
134
  else:
135
  print(f"✅ 模型已存在: {LOCAL_MODEL_PATH}")
136
 
137
- # 🔥 关键修改:使用量化 + 内存优化
138
- print("正在使用 ms-swift PtEngine 加载模型(量化模式)...")
139
 
140
  engine = PtEngine(
141
  model_id_or_path=LOCAL_MODEL_PATH,
142
- torch_dtype=torch.float16,
143
  max_batch_size=1,
144
  device_map='cpu',
145
- # 🔥 添加量化参数
146
- quant_method='bnb', # 使用 bitsandbytes 量化
147
- quant_bits=4, # 4-bit 量化(也可以用8)
148
- bnb_4bit_compute_dtype=torch.float16,
149
  model_kwargs={
150
  'low_cpu_mem_usage': True,
151
- 'max_memory': {'cpu': '12GB'}, # 限制最大内存使用
 
152
  }
153
  )
154
 
155
- print("✅ ms-swift PtEngine 加载完成(已启用量化)")
156
 
157
  except Exception as e:
158
  print(f"❌ 模型加载失败: {e}")
 
31
  genai = None
32
 
33
  # --- 环境设置 ---
34
+ os.environ['ENABLE_AUDIO_OUTPUT'] = '0'
35
+ os.environ['VIDEO_TOTAL_PIXELS'] = '0'
36
+ os.environ['IMAGE_FACTOR'] = '1'
37
+ os.environ['MAX_PIXELS'] = '1024' # 降低到最低
38
+
39
  warnings.filterwarnings("ignore")
40
  os.environ['PYTHONWARNINGS'] = 'ignore'
41
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
139
  else:
140
  print(f"✅ 模型已存在: {LOCAL_MODEL_PATH}")
141
 
142
+
143
+ print("正在加载文本模式(禁用多模态)...")
144
 
145
  engine = PtEngine(
146
  model_id_or_path=LOCAL_MODEL_PATH,
147
+ torch_dtype=torch.bfloat16, # 使用 bfloat16 更省内存
148
  max_batch_size=1,
149
  device_map='cpu',
150
+ quant_method='bnb',
151
+ quantization_bit=4,
 
 
152
  model_kwargs={
153
  'low_cpu_mem_usage': True,
154
+ 'max_memory': {'cpu': '10GB'},
155
+ 'offload_folder': './offload', # 内存不足时卸载到磁盘
156
  }
157
  )
158
 
159
+ print("✅ 文本模式加载完成")
160
 
161
  except Exception as e:
162
  print(f"❌ 模型加载失败: {e}")