GiantPandas commited on
Commit
22afd03
·
verified ·
1 Parent(s): f7f88c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -387
app.py CHANGED
@@ -1,406 +1,93 @@
1
- import os
2
- import re
3
- import sys
4
- import json
5
- import time
6
- import copy
7
  import base64
8
- import asyncio
9
- import tempfile
10
- import subprocess
11
- from pathlib import Path
12
- from datetime import datetime
13
- import zipfile
14
- import httpx, aiofiles, os, asyncio
15
- import numpy as np
16
  import gradio as gr
17
- from PIL import Image
18
- from pdf2image import convert_from_path
19
- from loguru import logger
20
- from openai import OpenAI, AsyncOpenAI
21
- from gradio_pdf import PDF
22
 
23
- import aiohttp
24
- import uuid
25
- import tqdm
26
 
27
- import requests
28
-
29
-
30
- def setup_poppler_linux():
31
- poppler_dir = "/tmp/poppler"
32
- if not os.path.exists(poppler_dir):
33
- os.makedirs(poppler_dir, exist_ok=True)
34
- subprocess.run([
35
- "apt-get", "update"
36
- ], check=True)
37
- subprocess.run([
38
- "apt-get", "install", "-y", "poppler-utils"
39
- ], check=True)
40
-
41
- setup_poppler_linux()
42
-
43
-
44
-
45
- preset_prompts = [
46
- "Please convert the document into Markdown format.",
47
- "Generate a clean and structured Markdown version of the document.",
48
- "Transform this content into Markdown with proper headings and bullet points.",
49
- "Convert the text to Markdown, preserving structure and formatting.",
50
- "Reformat this document as Markdown with clear sections and lists.",
51
- ]
52
-
53
-
54
- def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
55
- url = f"{openai_api_base}{route}"
56
- headers = {}
57
- if api_key:
58
- headers["Authorization"] = f"Bearer {api_key}"
59
-
60
- with open(file_path, "rb") as f:
61
- files = {"file": (os.path.basename(file_path), f, "application/pdf")}
62
- response = requests.post(url, files=files, headers=headers)
63
- return response
64
-
65
-
66
-
67
-
68
- async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
69
- """使用aiohttp异步发送PDF"""
70
- url = f"{server_ip}{route}"
71
- headers = {}
72
- if Authorization:
73
- headers["Authorization"] = f"Bearer {Authorization}"
74
-
75
- try:
76
- async with aiohttp.ClientSession() as session:
77
- with open(file_path, "rb") as f:
78
- data = aiohttp.FormData()
79
- data.add_field('file', f, filename=os.path.basename(file_path), content_type='application/pdf')
80
- async with session.post(url, data=data, headers=headers) as response:
81
- print(f"PDF发送成功: {file_path}, 状态码: {response.status}")
82
- return response
83
- except Exception as e:
84
- print(f"PDF发送失败: {file_path}, 错误: {e}")
85
- return None
86
-
87
-
88
- def extract_makrdown(text):
89
- m = re.search(r'```markdown\s*([\s\S]*?)```', text, re.MULTILINE)
90
- if m:
91
- return m.group(1).strip()
92
- else:
93
- return text
94
-
95
- openai_api_key = "EMPTY"
96
-
97
- openai_api_base = os.environ.get("openai_api_base")
98
-
99
- IP = os.environ.get("IP")
100
-
101
- PORT = os.environ.get("PORT")
102
-
103
- Authorization = os.environ.get("Authorization")
104
-
105
- client = AsyncOpenAI(
106
- api_key=openai_api_key,
107
- base_url=openai_api_base + "/v1",
108
- )
109
-
110
-
111
- async def request(messages):
112
-
113
- chat_completion_from_base64 = await client.chat.completions.create(
114
- messages=messages,
115
- extra_headers={
116
- "Authorization": f"Bearer {Authorization}"
117
- },
118
- model="Qwen2_5VL",
119
- max_completion_tokens=4096,
120
- stream=True,
121
- temperature=0.0,
122
- top_p=0.95
123
  )
124
-
125
- page = ""
126
- async for chunk in chat_completion_from_base64:
127
- if chunk.choices[0].delta.content:
128
- content = chunk.choices[0].delta.content
129
-
130
- choice = chunk.choices[0]
131
- if choice.finish_reason is not None:
132
- print(f"end reason = {choice.finish_reason}")
133
- break
134
- page += content
135
-
136
- yield content
137
-
138
-
139
- def images_to_pdf(img_paths, pdf_path):
140
-
141
- if isinstance(img_paths, (str, Path)):
142
- img_paths = [img_paths]
143
-
144
- if not img_paths:
145
- raise ValueError("img_paths is empty")
146
- images = []
147
- for p in img_paths:
148
- p = Path(p)
149
- if not p.is_file():
150
- raise FileNotFoundError(p)
151
-
152
- img = Image.open(p)
153
- if img.mode in ("RGBA", "P"):
154
- img = img.convert("RGB")
155
- images.append(img)
156
 
157
- pdf_path = Path(pdf_path)
158
- pdf_path.parent.mkdir(parents=True, exist_ok=True)
159
- images[0].save(pdf_path,
160
- save_all=True,
161
- append_images=images[1:],
162
- resolution=300.0)
163
- return pdf_path
164
 
 
 
165
 
166
- def encode_image(image_path):
167
- with open(image_path, "rb") as image_file:
168
- return base64.b64encode(image_file.read()).decode("utf-8")
169
-
170
- def build_message(image_path, prompt):
171
-
172
- content = [
173
- {
174
- "type": "image_url",
175
- "image_url": {
176
- "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
177
- }
178
- },
179
- {"type": "text", 'text': prompt}
180
- ]
181
-
182
-
183
- messages = [
184
- {"role": "system", "content": "You are a helpful assistant."},
185
- {'role': 'user', 'content': content}
186
-
187
- ]
188
-
189
- return messages
190
-
191
-
192
-
193
- def download_markdown_file(md_text):
194
- filename = f"markdown_{uuid.uuid4().hex[:8]}.md"
195
- filepath = Path("downloads") / filename
196
- filepath.parent.mkdir(exist_ok=True)
197
- with open(filepath, "w", encoding="utf-8") as f:
198
- f.write(md_text)
199
- return str(filepath)
200
-
201
-
202
- async def doc_parser(doc_path, prompt):
203
-
204
- doc_path = Path(doc_path)
205
- if not doc_path.is_file():
206
- raise FileNotFoundError(doc_path)
207
-
208
- with tempfile.TemporaryDirectory() as tmpdir:
209
- tmpdir = Path(tmpdir)
210
-
211
- queries = []
212
- if doc_path.suffix.lower() == ".pdf":
213
- pages: List[Image.Image] = convert_from_path(doc_path, dpi=300)
214
- for idx, page in enumerate(pages, start=1):
215
- img_path = tmpdir / f"page_{idx}.png"
216
- page.save(img_path, "PNG")
217
-
218
- messages = build_message(img_path, prompt)
219
- queries.append(messages)
220
-
221
- else:
222
- messages = build_message(doc_path, prompt)
223
- queries.append(messages)
224
-
225
- all_pages = []
226
- all_pages_raw = []
227
- for query in queries:
228
- pages = ""
229
- async for chunk in request(query):
230
- pages += chunk
231
- yield extract_makrdown(pages), pages
232
- all_pages.append(extract_makrdown(pages))
233
- all_pages_raw.append(pages)
234
- print(all_pages)
235
- yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
236
-
237
-
238
- def compress_directory_to_zip(directory_path, output_zip_path):
239
- try:
240
- with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
241
-
242
-
243
- for root, dirs, files in os.walk(directory_path):
244
- for file in files:
245
-
246
- file_path = os.path.join(root, file)
247
-
248
- arcname = os.path.relpath(file_path, directory_path)
249
-
250
- zipf.write(file_path, arcname)
251
- return 0
252
- except Exception as e:
253
- logger.exception(e)
254
- return -1
255
-
256
- latex_delimiters = [
257
- {'left': '$$', 'right': '$$', 'display': True},
258
- {'left': '$', 'right': '$', 'display': False},
259
- {'left': '\\(', 'right': '\\)', 'display': False},
260
- {'left': '\\[', 'right': '\\]', 'display': True},
261
- ]
262
-
263
- def check_prompt(prompt):
264
- if not prompt or prompt.strip() == "":
265
- raise gr.Error("Please select or enter a prompt before parsing.")
266
- return prompt
267
-
268
- def to_file(image_path):
269
-
270
- if image_path.endswith("Academic_Papers.png"):
271
- image_path = image_path.replace("Academic_Papers.png", "Academic_Papers.pdf")
272
-
273
- return image_path
274
-
275
-
276
- # async def process_file(file_path):
277
- # if not file_path.endswith(".pdf"):
278
- # tmp_path = Path(file_path).with_suffix(".pdf")
279
- # images_to_pdf(file_path, tmp_path)
280
- # else:
281
- # tmp_path = Path(file_path)
282
-
283
- # async with httpx.AsyncClient() as client:
284
- # await send_pdf_to_parse_async(client, str(tmp_path), IP, PORT)
285
- # return str(tmp_path)
286
-
287
-
288
- async def process_file(file_path):
289
- """使用asyncio的异步方案"""
290
- if file_path is None:
291
- return None
292
-
293
- if not file_path.endswith(".pdf"):
294
- tmp_file_path = Path(file_path)
295
- tmp_file_path = tmp_file_path.with_suffix(".pdf")
296
- images_to_pdf(file_path, tmp_file_path)
297
- else:
298
- tmp_file_path = file_path
299
- asyncio.create_task(send_pdf_async_aiohttp(tmp_file_path, server_ip=openai_api_base, Authorization=Authorization))
300
 
301
- return str(tmp_file_path)
 
 
 
 
302
 
 
 
303
 
304
- if __name__ == '__main__':
305
- with gr.Blocks() as demo:
306
- with gr.Row():
307
- with gr.Column(variant='panel', scale=5):
308
-
309
- file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'], type="filepath")
310
- prompts = gr.Dropdown(
311
- choices=preset_prompts,
312
- label="Prompt",
313
- info="Enter or select prompts...",
314
- value=preset_prompts[0],
315
- multiselect=False,
316
- interactive=True,
317
- allow_custom_value=True,
318
- )
319
 
320
- with gr.Row():
321
- change_bu = gr.Button('Parse')
322
- clear_bu = gr.ClearButton(value='Clear')
323
- pdf_show = PDF(label='Preview', interactive=False, visible=True, height=800)
324
 
 
 
 
 
 
325
 
326
-
327
- example_root = os.path.join(os.path.dirname(__file__), 'examples')
328
- images = [
329
- os.path.join(example_root, f)
330
- for f in os.listdir(example_root)
331
- if f.lower().endswith(('png', 'jpg', 'jpeg'))
332
- ]
333
 
334
- with gr.Column(variant='panel', scale=5):
335
- with gr.Accordion("Examples", open=True):
336
- example_root = "examples"
337
- file_path = [
338
- os.path.join(example_root, f)
339
- for f in ["Financial_Reports.png", "Books.png", "Magazines.png", "Academic_Papers.png"]
340
-
341
- ]
342
-
343
- with gr.Row():
344
- for i, label in enumerate(["Financial Reports(IMG)", "Books(IMG)", "Magazines(IMG)", "Academic Papers(PDF)"]):
345
- with gr.Column(scale=1, min_width=120):
346
- gr.Image(
347
- value=file_path[i],
348
- width=120,
349
- height=90,
350
- show_label=False,
351
- show_download_button=False
352
- )
353
- gr.Button(label).click(fn=to_file, inputs=gr.State(file_path[i]), outputs=file)
354
-
355
-
356
- download_btn = gr.Button("⬇️ Generate download link", size="sm")
357
- output_file = gr.File(label='Parse result', interactive=False, elem_id="down-file-box",visible=False)
358
-
359
- gr.HTML("""
360
- <style>
361
- #down-file-box {
362
- max-height: 300px;
363
- }
364
- </style>
365
- """)
366
- with gr.Tabs():
367
- with gr.Tab('Markdown rendering'):
368
- md = gr.Markdown(label='Markdown rendering', height=1100, show_copy_button=True,
369
- latex_delimiters=latex_delimiters,
370
- line_breaks=True)
371
- with gr.Tab('Markdown text'):
372
- md_text = gr.TextArea(lines=45, show_copy_button=True)
373
-
374
-
375
-
376
- file.change(fn=process_file, inputs=file, outputs=pdf_show)
377
 
 
 
 
378
 
379
- change_bu.click(
380
- fn=check_prompt,
381
- inputs=prompts,
382
- outputs=prompts
383
- ).then(
384
- lambda f: gr.update(visible=False),
385
- inputs=output_file,
386
- outputs=output_file
387
- ).then(
388
- fn=doc_parser,
389
- inputs=[file, prompts],
390
- outputs=[md, md_text]
391
- )
392
-
393
- clear_bu.add([file, md, pdf_show, md_text])
394
-
395
- download_btn.click(
396
- fn=download_markdown_file,
397
- inputs=md_text,
398
- outputs=output_file
399
- ).then(
400
- lambda f: gr.update(visible=True),
401
- inputs=output_file,
402
- outputs=output_file
403
- )
404
 
 
 
 
 
 
 
405
 
406
- demo.launch(server_name='0.0.0.0',share=True)
 
1
+ # gallery_viewer.py
 
 
 
 
 
2
  import base64
3
+ import pathlib
 
 
 
 
 
 
 
4
  import gradio as gr
 
 
 
 
 
5
 
 
 
 
6
 
7
+ def files_to_b64(files):
8
+ """把上传的多张图片转成 base64 列表,以便前端快速切换。"""
9
+ b64_list = []
10
+ for f in files:
11
+ suffix = pathlib.Path(f.name).suffix[1:]
12
+ data = base64.b64encode(f.read()).decode()
13
+ b64_list.append(f"data:image/{suffix};base64,{data}")
14
+ return b64_list
15
+
16
+
17
+ def render_img(b64_list, idx, scale):
18
+ """根据当前索引 idx 和缩放倍数 scale 渲染 HTML。"""
19
+ if not b64_list:
20
+ return "<p style='color:gray'>请先上传图片</p>"
21
+ idx %= len(b64_list) # 防止越界
22
+ src = b64_list[idx]
23
+ return (
24
+ f'<div style="overflow:auto;border:1px solid #ccc;'
25
+ f'width:100%;height:800px;text-align:center">'
26
+ f'<img src="{src}" '
27
+ f'style="transform:scale({scale});transform-origin:0 0;" />'
28
+ f'</div>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
 
 
 
 
 
 
 
31
 
32
+ with gr.Blocks(title="多图预览 + 缩放 + 翻页") as demo:
33
+ gr.Markdown("## 🖼️ 多张图片翻页预览 Demo")
34
 
35
+ # 用户上传
36
+ uploader = gr.File(
37
+ file_types=[".png", ".jpg", ".jpeg"],
38
+ file_count="multiple",
39
+ label="上传多张图片",
40
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # 缩放 & 翻页 控件
43
+ with gr.Row():
44
+ prev_btn = gr.Button("⬅️ 上一张")
45
+ next_btn = gr.Button("下一张 ➡️")
46
+ zoom = gr.Slider(0.5, 3, value=1, step=0.1, label="缩放倍数")
47
 
48
+ # HTML 容器
49
+ viewer = gr.HTML()
50
 
51
+ # ---- 状态变量 ----
52
+ img_list_state = gr.State([]) # base64 列表
53
+ idx_state = gr.State(0) # 当前索引
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ # ---- 事件逻辑 ----
56
+ def upload_handler(files):
57
+ b64s = files_to_b64(files)
58
+ return b64s, 0, render_img(b64s, 0, 1)
59
 
60
+ uploader.change(
61
+ upload_handler,
62
+ inputs=uploader,
63
+ outputs=[img_list_state, idx_state, viewer],
64
+ )
65
 
66
+ def show_prev(b64s, idx, scale):
67
+ idx -= 1
68
+ return idx, render_img(b64s, idx, scale)
 
 
 
 
69
 
70
+ prev_btn.click(
71
+ show_prev,
72
+ inputs=[img_list_state, idx_state, zoom],
73
+ outputs=[idx_state, viewer],
74
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ def show_next(b64s, idx, scale):
77
+ idx += 1
78
+ return idx, render_img(b64s, idx, scale)
79
 
80
+ next_btn.click(
81
+ show_next,
82
+ inputs=[img_list_state, idx_state, zoom],
83
+ outputs=[idx_state, viewer],
84
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ # 缩放滑块变化
87
+ zoom.change(
88
+ lambda b64s, idx, scale: render_img(b64s, idx, scale),
89
+ inputs=[img_list_state, idx_state, zoom],
90
+ outputs=viewer,
91
+ )
92
 
93
+ demo.launch(share=True)