Spaces:

AI-is-out-there
/

LatexBot

Runtime error

App Files Files Community

zyzk95469 commited on Apr 28, 2025

Commit

da14e9b

verified ·

1 Parent(s): dce6ebb

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -70

app.py CHANGED Viewed

@@ -1,14 +1,11 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoProcessor, AutoModelForVision2Seq, BitsAndBytesConfig
 from peft import PeftModel
 import torch
 import os
 import gc
 from qwen_vl_utils import process_vision_info
-# 设置环境变量以限制 PyTorch 内存使用
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 # 全局变量
 model = None
 tokenizer = None
@@ -18,57 +15,39 @@ def load_model():
     global model, tokenizer, processor
     # 清理内存
-    torch.cuda.empty_cache()
     gc.collect()
-    # Define paths (for Hugging Face Spaces)
     base_model_path = "Qwen/Qwen2.5-VL-7B-Instruct"
-    lora_model_path = os.environ.get("LORA_PATH", "AI-is-out-there/Latex-OCR")  # 您的LoRA权重路径
-    # Load tokenizer and processor
     tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)
     processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True)
-    # 配置4位量化
-    quantization_config = BitsAndBytesConfig(
-        load_in_4bit=True,               # 启用4位量化
-        bnb_4bit_compute_dtype=torch.float16,  # 计算数据类型
-        bnb_4bit_use_double_quant=True,  # 使用嵌套量化进一步减少内存
-        bnb_4bit_quant_type="nf4",       # 使用NormalFloat4量化类型
     )
     try:
-        # 使用4位量化加载模型
-        model = AutoModelForVision2Seq.from_pretrained(
-            base_model_path,
-            trust_remote_code=True,
-            device_map="auto",           # 自动分配到可用设备
-            quantization_config=quantization_config,
-        )
-        # 应用LoRA权重
         model = PeftModel.from_pretrained(model, lora_model_path)
-        model.eval()
-        print("模型使用4位量化成功加载!")
     except Exception as e:
-        print(f"4位量化加载失败: {e}，尝试备用方法")
-        # 回退到CPU卸载方案
-        model = AutoModelForVision2Seq.from_pretrained(
-            base_model_path,
-            trust_remote_code=True,
-            device_map="auto",
-            torch_dtype=torch.float16,
-            offload_folder="offload"
-        )
-        # 应用LoRA权重
-        model = PeftModel.from_pretrained(model, lora_model_path)
-        model.eval()
-        print("模型使用备用方案加载成功!")
     return model, tokenizer, processor
 def recognize_formula(image):
@@ -78,7 +57,6 @@ def recognize_formula(image):
     try:
         # 清理内存
-        torch.cuda.empty_cache()
         gc.collect()
         # 准备消息数据格式
@@ -108,35 +86,13 @@ def recognize_formula(image):
             padding=True,
             return_tensors="pt",
         )
-        # 将输入数据移动到适当的设备
-        for k, v in inputs.items():
-            if hasattr(v, "to"):
-                try:
-                    # 尝试获取model.device
-                    if hasattr(model, "device"):
-                        inputs[k] = v.to(model.device)
-                    else:
-                        # 尝试获取第一个设备映射
-                        if hasattr(model, "hf_device_map"):
-                            first_device = next(iter(model.hf_device_map.values()))
-                            inputs[k] = v.to(first_device)
-                        else:
-                            # 默认到CUDA或CPU
-                            inputs[k] = v.to('cuda:0' if torch.cuda.is_available() else 'cpu')
-                except Exception as e:
-                    print(f"移动输入到设备时出错: {e}")
-                    # 安全回退
-                    inputs[k] = v.to('cuda:0' if torch.cuda.is_available() else 'cpu')
-        # 生成预测
         with torch.no_grad():
             generated_ids = model.generate(
                 **inputs,
-                max_new_tokens=512,
-                do_sample=False,   # 使用贪婪解码
-                num_beams=1,       # 不使用束搜索
-                low_memory=True    # 低内存模式
             )
             # 提取生成的ID（去除输入部分）
@@ -151,7 +107,7 @@ def recognize_formula(image):
                 clean_up_tokenization_spaces=False
             )
-        # 清理输出文本，仅保留LaTeX部分
         latex_result = output_text[0].strip()
         return latex_result
@@ -174,4 +130,4 @@ if __name__ == "__main__":
     # 初始化模型
     model, tokenizer, processor = load_model()
     # 启动接口
-    iface.launch(share=False)

 import gradio as gr
+from transformers import AutoTokenizer, AutoProcessor, AutoModelForVision2Seq
 from peft import PeftModel
 import torch
 import os
 import gc
 from qwen_vl_utils import process_vision_info
 # 全局变量
 model = None
 tokenizer = None
     global model, tokenizer, processor
     # 清理内存
     gc.collect()
+    # 定义模型路径
     base_model_path = "Qwen/Qwen2.5-VL-7B-Instruct"
+    lora_model_path = os.environ.get("LORA_PATH", "AI-is-out-there/Latex-OCR")
+    print(f"开始加载模型: {base_model_path}")
+    # 加载tokenizer和processor
     tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)
     processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True)
+    # 加载模型到CPU
+    model = AutoModelForVision2Seq.from_pretrained(
+        base_model_path,
+        trust_remote_code=True,
+        device_map="cpu",
+        torch_dtype=torch.float32,  # CPU上使用float32
     )
+    # 应用LoRA权重
     try:
+        print(f"加载LoRA权重: {lora_model_path}")
         model = PeftModel.from_pretrained(model, lora_model_path)
+        print("LoRA权重加载成功!")
     except Exception as e:
+        print(f"LoRA权重加载失败: {e}")
+        print("将使用基础模型继续...")
+    # 设置为评估模式
+    model.eval()
+    print("模型加载完成!")
     return model, tokenizer, processor
 def recognize_formula(image):
     try:
         # 清理内存
         gc.collect()
         # 准备消息数据格式
             padding=True,
             return_tensors="pt",
         )
+        # 生成预测 - 减少token数量以提高CPU速度
         with torch.no_grad():
             generated_ids = model.generate(
                 **inputs,
+                max_new_tokens=100,  # 减少生成token数量
+                do_sample=False
             )
             # 提取生成的ID（去除输入部分）
                 clean_up_tokenization_spaces=False
             )
+        # 清理输出文本
         latex_result = output_text[0].strip()
         return latex_result
     # 初始化模型
     model, tokenizer, processor = load_model()
     # 启动接口
+    iface.launch()