winbeau commited on
Commit
a4cc0a3
·
1 Parent(s): b59a5c4

feat: cuda

Browse files
Files changed (1) hide show
  1. app.py +100 -53
app.py CHANGED
@@ -1,91 +1,142 @@
1
  """
2
  MinerU PDF 解析器 - HuggingFace Spaces ZeroGPU 版本
3
- 使用 monkey-patch 解决 daemonic processes 问题
4
  """
5
 
6
  # ============================================
7
- # 关键:在导入任何其他模块之前进行 monkey-patch
8
  # ============================================
9
  import os
10
  import sys
11
 
12
- # 禁用多进程相关环境变量
13
  os.environ['MINERU_WORKER_NUM'] = '0'
14
  os.environ['OMP_NUM_THREADS'] = '1'
15
  os.environ['MKL_NUM_THREADS'] = '1'
16
  os.environ['TOKENIZERS_PARALLELISM'] = 'false'
17
- os.environ['ONNXRUNTIME_LOG_SEVERITY_LEVEL'] = '3' # 隐藏 ONNX Runtime 警告
18
- os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 帮助 MIG 兼容性
19
- os.environ['TORCH_USE_CUDA_DSA'] = '1' # 设备端断言
20
 
21
- # Monkey-patch: 将 ProcessPoolExecutor 替换为 ThreadPoolExecutor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  import concurrent.futures
23
  from concurrent.futures import ThreadPoolExecutor
24
 
25
- # 保存原始的 ProcessPoolExecutor
26
- _OriginalProcessPoolExecutor = concurrent.futures.ProcessPoolExecutor
27
-
28
- # 创建一个假的 ProcessPoolExecutor,实际使用 ThreadPoolExecutor
29
  class FakeProcessPoolExecutor(ThreadPoolExecutor):
30
- """用 ThreadPoolExecutor 替代 ProcessPoolExecutor,避免 daemon 进程问题"""
31
  def __init__(self, max_workers=None, mp_context=None, initializer=None, initargs=()):
32
- # 忽略 mp_context 参数,因为 ThreadPoolExecutor 不需要
33
  super().__init__(max_workers=max_workers, initializer=initializer, initargs=initargs)
34
 
35
- # 替换
36
  concurrent.futures.ProcessPoolExecutor = FakeProcessPoolExecutor
37
 
38
- # 同时替换 multiprocessing.Pool
39
  import multiprocessing
40
  import multiprocessing.pool
41
 
42
  class FakePool:
43
- """用线程模拟 multiprocessing.Pool"""
44
  def __init__(self, processes=None, initializer=None, initargs=(), maxtasksperchild=None, context=None):
45
  self._executor = ThreadPoolExecutor(max_workers=processes)
46
-
47
  def map(self, func, iterable, chunksize=None):
48
  return list(self._executor.map(func, iterable))
49
-
50
  def starmap(self, func, iterable, chunksize=None):
51
- def wrapper(args):
52
- return func(*args)
53
- return list(self._executor.map(wrapper, iterable))
54
-
55
  def apply(self, func, args=(), kwds={}):
56
- future = self._executor.submit(func, *args, **kwds)
57
- return future.result()
58
-
59
  def apply_async(self, func, args=(), kwds={}, callback=None, error_callback=None):
60
  future = self._executor.submit(func, *args, **kwds)
61
  if callback:
62
  future.add_done_callback(lambda f: callback(f.result()))
63
  return future
64
-
65
  def close(self):
66
  self._executor.shutdown(wait=False)
67
-
68
  def terminate(self):
69
  self._executor.shutdown(wait=False, cancel_futures=True)
70
-
71
  def join(self):
72
  self._executor.shutdown(wait=True)
73
-
74
  def __enter__(self):
75
  return self
76
-
77
  def __exit__(self, exc_type, exc_val, exc_tb):
78
  self.terminate()
79
  return False
80
 
81
- # 替换 multiprocessing.Pool
82
  multiprocessing.Pool = FakePool
83
  multiprocessing.pool.Pool = FakePool
84
 
85
- print("✅ Monkey-patch applied: ProcessPoolExecutor → ThreadPoolExecutor")
86
 
87
  # ============================================
88
- # 现在可以安全导入其他模块
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # ============================================
90
  import spaces
91
  import gradio as gr
@@ -97,9 +148,9 @@ from pathlib import Path
97
  @spaces.GPU(duration=300)
98
  def parse_document(
99
  file,
100
- backend: str = "vlm-auto-engine", # VLM 模式更兼容 MIG GPU
101
  lang: str = "ch",
102
- max_pages: int = 20,
103
  table_enable: bool = True,
104
  formula_enable: bool = True,
105
  ):
@@ -110,6 +161,11 @@ def parse_document(
110
  gpu_name = torch.cuda.get_device_name(0)
111
  gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1024**3
112
  print(f"✅ GPU: {gpu_name} ({gpu_mem:.1f} GB)")
 
 
 
 
 
113
  else:
114
  print("❌ No GPU available!")
115
  return "错误:GPU 不可用", "", 0
@@ -173,12 +229,10 @@ def parse_document(
173
  print(status)
174
  return status, markdown, elapsed
175
  else:
176
- # 查找可能的输出文件
177
  for root, dirs, files in os.walk(output_dir):
178
  for f in files:
179
  if f.endswith('.md'):
180
- md_file = os.path.join(root, f)
181
- with open(md_file, "r", encoding="utf-8") as file:
182
  markdown = file.read()
183
  return f"✅ 解析成功!耗时 {elapsed:.1f} 秒", markdown, elapsed
184
  return f"❌ 解析失败:未找到输出文件", "", elapsed
@@ -193,12 +247,12 @@ def parse_document(
193
 
194
 
195
  # Gradio 界面
196
- with gr.Blocks(title="MinerU PDF 解析器 (ZeroGPU H200)", theme=gr.themes.Soft()) as demo:
197
  gr.Markdown("""
198
  # 📄 MinerU PDF 解析器
199
- ### 🚀 Powered by HuggingFace ZeroGPU (NVIDIA H200 70GB)
200
 
201
- 将 PDF/图片转换为 Markdown 格式,支持表格、公式识别。
202
  """)
203
 
204
  with gr.Row():
@@ -223,15 +277,12 @@ with gr.Blocks(title="MinerU PDF 解析器 (ZeroGPU H200)", theme=gr.themes.Soft
223
  ("中文", "ch"),
224
  ("英文", "en"),
225
  ("自动检测", "auto"),
226
- ("日文", "japan"),
227
- ("韩文", "korean"),
228
- ("拉丁语系", "latin"),
229
  ],
230
  value="ch",
231
  label="文档语言",
232
  )
233
 
234
- max_pages = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="最大页数")
235
 
236
  with gr.Row():
237
  table_enable = gr.Checkbox(value=True, label="表格识别")
@@ -252,14 +303,10 @@ with gr.Blocks(title="MinerU PDF 解析器 (ZeroGPU H200)", theme=gr.themes.Soft
252
 
253
  gr.Markdown("""
254
  ---
255
- ### 📝 说明
256
- - **VLM 模式**: 推荐,兼容 ZeroGPU MIG 分区
257
- - **混合模式**: 综合精度和速度
258
- - **Pipeline 模式**: 可能在 MIG GPU 上有兼容性问题
259
-
260
- ### ⚠️ 注意
261
- - ZeroGPU 有使用配额限制
262
- - 建议先用小文档测试
263
  """)
264
 
265
  if __name__ == "__main__":
 
1
  """
2
  MinerU PDF 解析器 - HuggingFace Spaces ZeroGPU 版本
3
+ 修复 H200 MIG (slice) CUBLAS 兼容性问题
4
  """
5
 
6
  # ============================================
7
+ # 关键:在导入任何其他模块之前设置环境变量
8
  # ============================================
9
  import os
10
  import sys
11
 
12
+ # 禁用多进程
13
  os.environ['MINERU_WORKER_NUM'] = '0'
14
  os.environ['OMP_NUM_THREADS'] = '1'
15
  os.environ['MKL_NUM_THREADS'] = '1'
16
  os.environ['TOKENIZERS_PARALLELISM'] = 'false'
 
 
 
17
 
18
+ # 隐藏警告
19
+ os.environ['ONNXRUNTIME_LOG_SEVERITY_LEVEL'] = '3'
20
+
21
+ # 禁用 Flash Attention,强制 eager 模式
22
+ os.environ['ATTN_BACKEND'] = 'eager'
23
+ os.environ['TRANSFORMERS_ATTN_IMPLEMENTATION'] = 'eager'
24
+
25
+ # CUDA 设置
26
+ os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
27
+ os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
28
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
29
+
30
+ # ============================================
31
+ # Monkey-patch ProcessPoolExecutor
32
+ # ============================================
33
  import concurrent.futures
34
  from concurrent.futures import ThreadPoolExecutor
35
 
 
 
 
 
36
  class FakeProcessPoolExecutor(ThreadPoolExecutor):
 
37
  def __init__(self, max_workers=None, mp_context=None, initializer=None, initargs=()):
 
38
  super().__init__(max_workers=max_workers, initializer=initializer, initargs=initargs)
39
 
 
40
  concurrent.futures.ProcessPoolExecutor = FakeProcessPoolExecutor
41
 
 
42
  import multiprocessing
43
  import multiprocessing.pool
44
 
45
  class FakePool:
 
46
  def __init__(self, processes=None, initializer=None, initargs=(), maxtasksperchild=None, context=None):
47
  self._executor = ThreadPoolExecutor(max_workers=processes)
 
48
  def map(self, func, iterable, chunksize=None):
49
  return list(self._executor.map(func, iterable))
 
50
  def starmap(self, func, iterable, chunksize=None):
51
+ return list(self._executor.map(lambda args: func(*args), iterable))
 
 
 
52
  def apply(self, func, args=(), kwds={}):
53
+ return self._executor.submit(func, *args, **kwds).result()
 
 
54
  def apply_async(self, func, args=(), kwds={}, callback=None, error_callback=None):
55
  future = self._executor.submit(func, *args, **kwds)
56
  if callback:
57
  future.add_done_callback(lambda f: callback(f.result()))
58
  return future
 
59
  def close(self):
60
  self._executor.shutdown(wait=False)
 
61
  def terminate(self):
62
  self._executor.shutdown(wait=False, cancel_futures=True)
 
63
  def join(self):
64
  self._executor.shutdown(wait=True)
 
65
  def __enter__(self):
66
  return self
 
67
  def __exit__(self, exc_type, exc_val, exc_tb):
68
  self.terminate()
69
  return False
70
 
 
71
  multiprocessing.Pool = FakePool
72
  multiprocessing.pool.Pool = FakePool
73
 
74
+ print("✅ Monkey-patch: ProcessPoolExecutor → ThreadPoolExecutor")
75
 
76
  # ============================================
77
+ # Patch Tensor.__matmul__ (@ 运算符) 使用 CPU fallback
78
+ # ============================================
79
+ import torch
80
+
81
+ # 禁用所有 SDPA 优化,强制使用 math 实现
82
+ if hasattr(torch.backends.cuda, 'enable_flash_sdp'):
83
+ torch.backends.cuda.enable_flash_sdp(False)
84
+ if hasattr(torch.backends.cuda, 'enable_mem_efficient_sdp'):
85
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
86
+ if hasattr(torch.backends.cuda, 'enable_math_sdp'):
87
+ torch.backends.cuda.enable_math_sdp(True)
88
+
89
+ print("✅ Disabled Flash/MemEfficient SDPA, using math SDPA only")
90
+
91
+ # 保存原始方法
92
+ _original_tensor_matmul = torch.Tensor.__matmul__
93
+ _original_matmul = torch.matmul
94
+ _original_bmm = torch.bmm
95
+ _cublas_error_count = 0
96
+
97
+ def _safe_matmul_impl(a, b, original_fn):
98
+ """通用的安全矩阵乘法实现"""
99
+ global _cublas_error_count
100
+ try:
101
+ return original_fn(a, b)
102
+ except RuntimeError as e:
103
+ if 'CUBLAS' in str(e):
104
+ _cublas_error_count += 1
105
+ if _cublas_error_count <= 5:
106
+ print(f"⚠️ CUBLAS error #{_cublas_error_count}, falling back to CPU")
107
+ # 回退到 CPU
108
+ device = a.device
109
+ dtype = a.dtype
110
+ result = original_fn(a.float().cpu(), b.float().cpu())
111
+ return result.to(device=device, dtype=dtype)
112
+ raise
113
+
114
+ def safe_tensor_matmul(self, other):
115
+ """安全的 @ 运算符"""
116
+ return _safe_matmul_impl(self, other, _original_tensor_matmul)
117
+
118
+ def safe_matmul(input, other, *, out=None):
119
+ """安全的 torch.matmul"""
120
+ if out is not None:
121
+ # 有 out 参数时不能简单回退
122
+ return _original_matmul(input, other, out=out)
123
+ return _safe_matmul_impl(input, other, _original_matmul)
124
+
125
+ def safe_bmm(input, mat2, *, out=None):
126
+ """安全的 torch.bmm"""
127
+ if out is not None:
128
+ return _original_bmm(input, mat2, out=out)
129
+ return _safe_matmul_impl(input, mat2, _original_bmm)
130
+
131
+ # 应用 patches
132
+ torch.Tensor.__matmul__ = safe_tensor_matmul
133
+ torch.matmul = safe_matmul
134
+ torch.bmm = safe_bmm
135
+
136
+ print("✅ Monkey-patch: Tensor.__matmul__/matmul/bmm with CPU fallback")
137
+
138
+ # ============================================
139
+ # 导入其他模块
140
  # ============================================
141
  import spaces
142
  import gradio as gr
 
148
  @spaces.GPU(duration=300)
149
  def parse_document(
150
  file,
151
+ backend: str = "vlm-auto-engine",
152
  lang: str = "ch",
153
+ max_pages: int = 5,
154
  table_enable: bool = True,
155
  formula_enable: bool = True,
156
  ):
 
161
  gpu_name = torch.cuda.get_device_name(0)
162
  gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1024**3
163
  print(f"✅ GPU: {gpu_name} ({gpu_mem:.1f} GB)")
164
+
165
+ # 再次确保 SDPA 设置正确
166
+ if hasattr(torch.backends.cuda, 'enable_flash_sdp'):
167
+ torch.backends.cuda.enable_flash_sdp(False)
168
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
169
  else:
170
  print("❌ No GPU available!")
171
  return "错误:GPU 不可用", "", 0
 
229
  print(status)
230
  return status, markdown, elapsed
231
  else:
 
232
  for root, dirs, files in os.walk(output_dir):
233
  for f in files:
234
  if f.endswith('.md'):
235
+ with open(os.path.join(root, f), "r", encoding="utf-8") as file:
 
236
  markdown = file.read()
237
  return f"✅ 解析成功!耗时 {elapsed:.1f} 秒", markdown, elapsed
238
  return f"❌ 解析失败:未找到输出文件", "", elapsed
 
247
 
248
 
249
  # Gradio 界面
250
+ with gr.Blocks(title="MinerU PDF 解析器 (ZeroGPU)", theme=gr.themes.Soft()) as demo:
251
  gr.Markdown("""
252
  # 📄 MinerU PDF 解析器
253
+ ### 🚀 Powered by HuggingFace ZeroGPU (H200 Slice)
254
 
255
+ 将 PDF/图片转换为 Markdown,支持表格、公式识别。
256
  """)
257
 
258
  with gr.Row():
 
277
  ("中文", "ch"),
278
  ("英文", "en"),
279
  ("自动检测", "auto"),
 
 
 
280
  ],
281
  value="ch",
282
  label="文档语言",
283
  )
284
 
285
+ max_pages = gr.Slider(minimum=1, maximum=20, value=3, step=1, label="最大页数")
286
 
287
  with gr.Row():
288
  table_enable = gr.Checkbox(value=True, label="表格识别")
 
303
 
304
  gr.Markdown("""
305
  ---
306
+ ### ⚠️ 说明
307
+ - H200 MIG 分区可能存在 CUBLAS 兼容性问题
308
+ - 如果解析失败,会自动回退到 CPU 计算(较慢但稳定)
309
+ - 建议先用 1-3 页测试
 
 
 
 
310
  """)
311
 
312
  if __name__ == "__main__":