zyzk95469 commited on
Commit
da14e9b
·
verified ·
1 Parent(s): dce6ebb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -70
app.py CHANGED
@@ -1,14 +1,11 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoProcessor, AutoModelForVision2Seq, BitsAndBytesConfig
3
  from peft import PeftModel
4
  import torch
5
  import os
6
  import gc
7
  from qwen_vl_utils import process_vision_info
8
 
9
- # 设置环境变量以限制 PyTorch 内存使用
10
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
11
-
12
  # 全局变量
13
  model = None
14
  tokenizer = None
@@ -18,57 +15,39 @@ def load_model():
18
  global model, tokenizer, processor
19
 
20
  # 清理内存
21
- torch.cuda.empty_cache()
22
  gc.collect()
23
 
24
- # Define paths (for Hugging Face Spaces)
25
  base_model_path = "Qwen/Qwen2.5-VL-7B-Instruct"
26
- lora_model_path = os.environ.get("LORA_PATH", "AI-is-out-there/Latex-OCR") # 您的LoRA权重路径
 
 
27
 
28
- # Load tokenizer and processor
29
  tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)
30
  processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True)
31
 
32
- # 配置4位量化
33
- quantization_config = BitsAndBytesConfig(
34
- load_in_4bit=True, # 启用4位量化
35
- bnb_4bit_compute_dtype=torch.float16, # 计算数据类型
36
- bnb_4bit_use_double_quant=True, # 使用嵌套量化进一步减少内存
37
- bnb_4bit_quant_type="nf4", # 使用NormalFloat4量化类型
38
  )
39
 
 
40
  try:
41
- # 使用4位量化加载模型
42
- model = AutoModelForVision2Seq.from_pretrained(
43
- base_model_path,
44
- trust_remote_code=True,
45
- device_map="auto", # 自动分配到可用设备
46
- quantization_config=quantization_config,
47
- )
48
-
49
- # 应用LoRA权重
50
  model = PeftModel.from_pretrained(model, lora_model_path)
51
- model.eval()
52
-
53
- print("模型使用4位量化成功加载!")
54
  except Exception as e:
55
- print(f"4位量化加载失败: {e},尝试备用方法")
56
-
57
- # 回退到CPU卸载方案
58
- model = AutoModelForVision2Seq.from_pretrained(
59
- base_model_path,
60
- trust_remote_code=True,
61
- device_map="auto",
62
- torch_dtype=torch.float16,
63
- offload_folder="offload"
64
- )
65
-
66
- # 应用LoRA权重
67
- model = PeftModel.from_pretrained(model, lora_model_path)
68
- model.eval()
69
-
70
- print("模型使用备用方案加载成功!")
71
 
 
72
  return model, tokenizer, processor
73
 
74
  def recognize_formula(image):
@@ -78,7 +57,6 @@ def recognize_formula(image):
78
 
79
  try:
80
  # 清理内存
81
- torch.cuda.empty_cache()
82
  gc.collect()
83
 
84
  # 准备消息数据格式
@@ -108,35 +86,13 @@ def recognize_formula(image):
108
  padding=True,
109
  return_tensors="pt",
110
  )
111
-
112
- # 将输入数据移动到适当的设备
113
- for k, v in inputs.items():
114
- if hasattr(v, "to"):
115
- try:
116
- # 尝试获取model.device
117
- if hasattr(model, "device"):
118
- inputs[k] = v.to(model.device)
119
- else:
120
- # 尝试获取第一个设备映射
121
- if hasattr(model, "hf_device_map"):
122
- first_device = next(iter(model.hf_device_map.values()))
123
- inputs[k] = v.to(first_device)
124
- else:
125
- # 默认到CUDA或CPU
126
- inputs[k] = v.to('cuda:0' if torch.cuda.is_available() else 'cpu')
127
- except Exception as e:
128
- print(f"移动输入到设备时出错: {e}")
129
- # 安全回退
130
- inputs[k] = v.to('cuda:0' if torch.cuda.is_available() else 'cpu')
131
 
132
- # 生成预测
133
  with torch.no_grad():
134
  generated_ids = model.generate(
135
  **inputs,
136
- max_new_tokens=512,
137
- do_sample=False, # 使用贪婪解码
138
- num_beams=1, # 不使用束搜索
139
- low_memory=True # 低内存模式
140
  )
141
 
142
  # 提取生成的ID(去除输入部分)
@@ -151,7 +107,7 @@ def recognize_formula(image):
151
  clean_up_tokenization_spaces=False
152
  )
153
 
154
- # 清理输出文本,仅保留LaTeX部分
155
  latex_result = output_text[0].strip()
156
 
157
  return latex_result
@@ -174,4 +130,4 @@ if __name__ == "__main__":
174
  # 初始化模型
175
  model, tokenizer, processor = load_model()
176
  # 启动接口
177
- iface.launch(share=False)
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoProcessor, AutoModelForVision2Seq
3
  from peft import PeftModel
4
  import torch
5
  import os
6
  import gc
7
  from qwen_vl_utils import process_vision_info
8
 
 
 
 
9
  # 全局变量
10
  model = None
11
  tokenizer = None
 
15
  global model, tokenizer, processor
16
 
17
  # 清理内存
 
18
  gc.collect()
19
 
20
+ # 定义模型路径
21
  base_model_path = "Qwen/Qwen2.5-VL-7B-Instruct"
22
+ lora_model_path = os.environ.get("LORA_PATH", "AI-is-out-there/Latex-OCR")
23
+
24
+ print(f"开始加载模型: {base_model_path}")
25
 
26
+ # 加载tokenizerprocessor
27
  tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)
28
  processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True)
29
 
30
+ # 加载模型到CPU
31
+ model = AutoModelForVision2Seq.from_pretrained(
32
+ base_model_path,
33
+ trust_remote_code=True,
34
+ device_map="cpu",
35
+ torch_dtype=torch.float32, # CPU上使用float32
36
  )
37
 
38
+ # 应用LoRA权重
39
  try:
40
+ print(f"加载LoRA权重: {lora_model_path}")
 
 
 
 
 
 
 
 
41
  model = PeftModel.from_pretrained(model, lora_model_path)
42
+ print("LoRA权重加载成功!")
 
 
43
  except Exception as e:
44
+ print(f"LoRA权重加载失败: {e}")
45
+ print("将使用基础模型继续...")
46
+
47
+ # 设置为评估模式
48
+ model.eval()
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ print("模型加载完成!")
51
  return model, tokenizer, processor
52
 
53
  def recognize_formula(image):
 
57
 
58
  try:
59
  # 清理内存
 
60
  gc.collect()
61
 
62
  # 准备消息数据格式
 
86
  padding=True,
87
  return_tensors="pt",
88
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ # 生成预测 - 减少token数量以提高CPU速度
91
  with torch.no_grad():
92
  generated_ids = model.generate(
93
  **inputs,
94
+ max_new_tokens=100, # 减少生成token数量
95
+ do_sample=False
 
 
96
  )
97
 
98
  # 提取生成的ID(去除输入部分)
 
107
  clean_up_tokenization_spaces=False
108
  )
109
 
110
+ # 清理输出文本
111
  latex_result = output_text[0].strip()
112
 
113
  return latex_result
 
130
  # 初始化模型
131
  model, tokenizer, processor = load_model()
132
  # 启动接口
133
+ iface.launch()