prithivMLmods commited on
Commit
a87d2db
·
verified ·
1 Parent(s): 5f58462

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +583 -523
app.py CHANGED
@@ -1,539 +1,599 @@
1
  import os
2
- import json
3
- import copy
4
- import math
5
  import time
6
- import random
7
- import logging
8
- import numpy as np
9
- from typing import Any, Dict, List, Optional, Union
10
- import torch
11
- from PIL import Image
 
12
  import gradio as gr
13
- import spaces
14
- from diffusers import (
15
- DiffusionPipeline,
16
- FlowMatchEulerDiscreteScheduler)
17
- from huggingface_hub import (
18
- hf_hub_download,
19
- HfFileSystem,
20
- ModelCard,
21
- snapshot_download)
22
- from diffusers.utils import load_image
23
- import requests
24
- from urllib.parse import urlparse
25
- import tempfile
26
- import shutil
27
- import uuid
28
- import zipfile
29
-
30
-
31
- # META: CUDA_CHECK / GPU_INFO
32
- device = "cuda" if torch.cuda.is_available() else "cpu"
33
- print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
34
- print("torch.__version__ =", torch.__version__)
35
- print("torch.version.cuda =", torch.version.cuda)
36
- print("cuda available:", torch.cuda.is_available())
37
- print("cuda device count:", torch.cuda.device_count())
38
- if torch.cuda.is_available():
39
- print("current device:", torch.cuda.current_device())
40
- print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
41
-
42
- print("Using device:", device)
43
-
44
- loras = [
45
- # Sample Qwen-compatible LoRAs
46
- {
47
- "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Studio-Realism/resolve/main/images/2.png",
48
- "title": "Studio Realism",
49
- "repo": "prithivMLmods/Qwen-Image-Studio-Realism",
50
- "weights": "qwen-studio-realism.safetensors",
51
- "trigger_word": "Studio Realism"
52
- },
53
- {
54
- "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Sketch-Smudge/resolve/main/images/1.png",
55
- "title": "Sketch Smudge",
56
- "repo": "prithivMLmods/Qwen-Image-Sketch-Smudge",
57
- "weights": "qwen-sketch-smudge.safetensors",
58
- "trigger_word": "Sketch Smudge"
59
- },
60
- {
61
- "image": "https://huggingface.co/Shakker-Labs/AWPortrait-QW/resolve/main/images/08fdaf6b644b61136340d5c908ca37993e47f34cdbe2e8e8251c4c72.jpg",
62
- "title": "AWPortrait QW",
63
- "repo": "Shakker-Labs/AWPortrait-QW",
64
- "weights": "AWPortrait-QW_1.0.safetensors",
65
- "trigger_word": "Portrait"
66
- },
67
- {
68
- "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Anime-LoRA/resolve/main/images/1.png",
69
- "title": "Qwen Anime",
70
- "repo": "prithivMLmods/Qwen-Image-Anime-LoRA",
71
- "weights": "qwen-anime.safetensors",
72
- "trigger_word": "Qwen Anime"
73
- },
74
- {
75
- "image": "https://huggingface.co/flymy-ai/qwen-image-realism-lora/resolve/main/assets/flymy_realism.png",
76
- "title": "Image Realism",
77
- "repo": "flymy-ai/qwen-image-realism-lora",
78
- "weights": "flymy_realism.safetensors",
79
- "trigger_word": "Super Realism Portrait"
80
- },
81
- {
82
- "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Fragmented-Portraiture/resolve/main/images/3.png",
83
- "title": "Fragmented Portraiture",
84
- "repo": "prithivMLmods/Qwen-Image-Fragmented-Portraiture",
85
- "weights": "qwen-fragmented-portraiture.safetensors",
86
- "trigger_word": "Fragmented Portraiture"
87
- },
88
- {
89
- "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Synthetic-Face/resolve/main/images/2.png",
90
- "title": "Synthetic Face",
91
- "repo": "prithivMLmods/Qwen-Image-Synthetic-Face",
92
- "weights": "qwen-synthetic-face.safetensors",
93
- "trigger_word": "Synthetic Face"
94
- },
95
- {
96
- "image": "https://huggingface.co/itspoidaman/qwenglitch/resolve/main/images/GyZTwJIbkAAhS4h.jpeg",
97
- "title": "Qwen Glitch",
98
- "repo": "itspoidaman/qwenglitch",
99
- "weights": "qwenglitch1.safetensors",
100
- "trigger_word": "qwenglitch"
101
- },
102
- {
103
- "image": "https://huggingface.co/alfredplpl/qwen-image-modern-anime-lora/resolve/main/sample1.jpg",
104
- "title": "Modern Anime Lora",
105
- "repo": "alfredplpl/qwen-image-modern-anime-lora",
106
- "weights": "lora.safetensors",
107
- "trigger_word": "Japanese modern anime style"
108
- },
109
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- # Initialize the base model
112
- dtype = torch.bfloat16
113
- base_model = "Qwen/Qwen-Image"
114
-
115
- # Scheduler configuration from the Qwen-Image-Lightning repository
116
- scheduler_config = {
117
- "base_image_seq_len": 256,
118
- "base_shift": math.log(3),
119
- "invert_sigmas": False,
120
- "max_image_seq_len": 8192,
121
- "max_shift": math.log(3),
122
- "num_train_timesteps": 1000,
123
- "shift": 1.0,
124
- "shift_terminal": None,
125
- "stochastic_sampling": False,
126
- "time_shift_type": "exponential",
127
- "use_beta_sigmas": False,
128
- "use_dynamic_shifting": True,
129
- "use_exponential_sigmas": False,
130
- "use_karras_sigmas": False,
131
- }
132
-
133
- scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
134
- pipe = DiffusionPipeline.from_pretrained(
135
- base_model, scheduler=scheduler, torch_dtype=dtype
136
- ).to(device)
137
-
138
- # Lightning LoRA info (no global state)
139
- LIGHTNING_LORA_REPO = "lightx2v/Qwen-Image-Lightning"
140
- LIGHTNING_LORA_WEIGHT = "Qwen-Image-Lightning-8steps-V1.0.safetensors"
141
-
142
- MAX_SEED = np.iinfo(np.int32).max
143
-
144
- class Timer:
145
- def __init__(self, task_name=""):
146
- self.task_name = task_name
147
-
148
- def __enter__(self):
149
- self.start_time = time.time()
150
- return self
151
-
152
- def __exit__(self, exc_type, exc_value, traceback):
153
- self.end_time = time.time()
154
- self.elapsed_time = self.end_time - self.start_time
155
- if self.task_name:
156
- print(f"Elapsed time for {self.task_name}: {self.elapsed_time:.6f} seconds")
157
  else:
158
- print(f"Elapsed time: {self.elapsed_time:.6f} seconds")
159
-
160
- def compute_image_dimensions(aspect_ratio):
161
- """Converts aspect ratio string to width, height tuple."""
162
- if aspect_ratio == "1:1":
163
- return 1024, 1024
164
- elif aspect_ratio == "16:9":
165
- return 1152, 640
166
- elif aspect_ratio == "9:16":
167
- return 640, 1152
168
- elif aspect_ratio == "4:3":
169
- return 1024, 768
170
- elif aspect_ratio == "3:4":
171
- return 768, 1024
172
- elif aspect_ratio == "3:2":
173
- return 1024, 688
174
- elif aspect_ratio == "2:3":
175
- return 688, 1024
176
  else:
177
- return 1024, 1024
178
-
179
- def handle_lora_selection(evt: gr.SelectData, aspect_ratio):
180
- selected_lora = loras[evt.index]
181
- new_placeholder = f"Type a prompt for {selected_lora['title']}"
182
- lora_repo = selected_lora["repo"]
183
- updated_text = f"### Selected: [{lora_repo}](https://huggingface.co/{lora_repo}) ✅"
184
-
185
- # Update aspect ratio if specified in LoRA config
186
- if "aspect" in selected_lora:
187
- if selected_lora["aspect"] == "portrait":
188
- aspect_ratio = "9:16"
189
- elif selected_lora["aspect"] == "landscape":
190
- aspect_ratio = "16:9"
191
- else:
192
- aspect_ratio = "1:1"
193
-
194
- return (
195
- gr.update(placeholder=new_placeholder),
196
- updated_text,
197
- evt.index,
198
- aspect_ratio,
199
- )
200
-
201
- def adjust_generation_mode(speed_mode):
202
- """Update UI based on speed/quality toggle."""
203
- if speed_mode == "Fast (8 steps)":
204
- return gr.update(value="Fast mode selected - 8 steps with Lightning LoRA"), 8, 1.0
205
- else:
206
- return gr.update(value="Base mode selected - 50 steps for best quality"), 50, 4.0
207
-
208
- @spaces.GPU(duration=100)
209
- def create_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, negative_prompt=""):
210
- pipe.to("cuda")
211
- generator = torch.Generator(device="cuda").manual_seed(seed)
212
-
213
- with Timer("Generating image"):
214
- # Generate image
215
- image = pipe(
216
- prompt=prompt_mash,
217
- negative_prompt=negative_prompt,
218
- num_inference_steps=steps,
219
- true_cfg_scale=cfg_scale, # Use true_cfg_scale for Qwen-Image
220
- width=width,
221
- height=height,
222
- generator=generator,
223
- ).images[0]
224
-
225
- return image
226
-
227
- @spaces.GPU(duration=100)
228
- def process_adapter_generation(prompt, cfg_scale, steps, selected_index, randomize_seed, seed, aspect_ratio, lora_scale, speed_mode, progress=gr.Progress(track_tqdm=True)):
229
- if selected_index is None:
230
- raise gr.Error("You must select a LoRA before proceeding.")
231
-
232
- selected_lora = loras[selected_index]
233
- lora_path = selected_lora["repo"]
234
- trigger_word = selected_lora["trigger_word"]
235
-
236
- # Prepare prompt with trigger word
237
- if trigger_word:
238
- if "trigger_position" in selected_lora:
239
- if selected_lora["trigger_position"] == "prepend":
240
- prompt_mash = f"{trigger_word} {prompt}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  else:
242
- prompt_mash = f"{prompt} {trigger_word}"
243
- else:
244
- prompt_mash = f"{trigger_word} {prompt}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  else:
246
- prompt_mash = prompt
247
-
248
- # Always unload any existing LoRAs first to avoid conflicts
249
- with Timer("Unloading existing LoRAs"):
250
- pipe.unload_lora_weights()
251
-
252
- # Load LoRAs based on speed mode
253
- if speed_mode == "Fast (8 steps)":
254
- with Timer("Loading Lightning LoRA and style LoRA"):
255
- # Load Lightning LoRA first
256
- pipe.load_lora_weights(
257
- LIGHTNING_LORA_REPO,
258
- weight_name=LIGHTNING_LORA_WEIGHT,
259
- adapter_name="lightning"
260
- )
261
-
262
- # Load the selected style LoRA
263
- weight_name = selected_lora.get("weights", None)
264
- pipe.load_lora_weights(
265
- lora_path,
266
- weight_name=weight_name,
267
- low_cpu_mem_usage=True,
268
- adapter_name="style"
269
- )
270
-
271
- # Set both adapters active with their weights
272
- pipe.set_adapters(["lightning", "style"], adapter_weights=[1.0, lora_scale])
273
- else:
274
- # Quality mode - only load the style LoRA
275
- with Timer(f"Loading LoRA weights for {selected_lora['title']}"):
276
- weight_name = selected_lora.get("weights", None)
277
- pipe.load_lora_weights(
278
- lora_path,
279
- weight_name=weight_name,
280
- low_cpu_mem_usage=True,
281
- adapter_name="style"
282
- )
283
- pipe.set_adapters(["style"], adapter_weights=[lora_scale])
284
-
285
- # Set random seed for reproducibility
286
- with Timer("Randomizing seed"):
287
- if randomize_seed:
288
- seed = random.randint(0, MAX_SEED)
289
-
290
- # Get image dimensions from aspect ratio
291
- width, height = compute_image_dimensions(aspect_ratio)
292
-
293
- # Generate the image
294
- final_image = create_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale)
295
-
296
- return final_image, seed
297
-
298
- def fetch_hf_adapter_files(link):
299
- split_link = link.split("/")
300
- if len(split_link) != 2:
301
- raise Exception("Invalid Hugging Face repository link format.")
302
-
303
- print(f"Repository attempted: {split_link}")
304
-
305
- # Load model card
306
- model_card = ModelCard.load(link)
307
- base_model = model_card.data.get("base_model")
308
- print(f"Base model: {base_model}")
309
-
310
- # Validate model type (for Qwen-Image)
311
- acceptable_models = {"Qwen/Qwen-Image"}
312
-
313
- models_to_check = base_model if isinstance(base_model, list) else [base_model]
314
-
315
- if not any(model in acceptable_models for model in models_to_check):
316
- raise Exception("Not a Qwen-Image LoRA!")
317
-
318
- # Extract image and trigger word
319
- image_path = model_card.data.get("widget", [{}])[0].get("output", {}).get("url", None)
320
- trigger_word = model_card.data.get("instance_prompt", "")
321
- image_url = f"https://huggingface.co/{link}/resolve/main/{image_path}" if image_path else None
322
-
323
- # Initialize Hugging Face file system
324
- fs = HfFileSystem()
325
  try:
326
- list_of_files = fs.ls(link, detail=False)
327
-
328
- # Find safetensors file
329
- safetensors_name = None
330
- for file in list_of_files:
331
- filename = file.split("/")[-1]
332
- if filename.endswith(".safetensors"):
333
- safetensors_name = filename
334
- break
335
-
336
- if not safetensors_name:
337
- raise Exception("No valid *.safetensors file found in the repository.")
338
-
339
  except Exception as e:
340
- print(e)
341
- raise Exception("You didn't include a valid Hugging Face repository with a *.safetensors LoRA")
342
-
343
- return split_link[1], link, safetensors_name, trigger_word, image_url
344
-
345
- def validate_custom_adapter(link):
346
- print(f"Checking a custom model on: {link}")
347
-
348
- if link.endswith('.safetensors'):
349
- if 'huggingface.co' in link:
350
- parts = link.split('/')
351
- try:
352
- hf_index = parts.index('huggingface.co')
353
- username = parts[hf_index + 1]
354
- repo_name = parts[hf_index + 2]
355
- repo = f"{username}/{repo_name}"
356
-
357
- safetensors_name = parts[-1]
358
-
359
- try:
360
- model_card = ModelCard.load(repo)
361
- trigger_word = model_card.data.get("instance_prompt", "")
362
- image_path = model_card.data.get("widget", [{}])[0].get("output", {}).get("url", None)
363
- image_url = f"https://huggingface.co/{repo}/resolve/main/{image_path}" if image_path else None
364
- except:
365
- trigger_word = ""
366
- image_url = None
367
-
368
- return repo_name, repo, safetensors_name, trigger_word, image_url
369
- except:
370
- raise Exception("Invalid safetensors URL format")
371
-
372
- if link.startswith("https://"):
373
- if link.startswith("https://huggingface.co") or link.startswith("https://www.huggingface.co"):
374
- link_split = link.split("huggingface.co/")
375
- return fetch_hf_adapter_files(link_split[1])
376
- else:
377
- return fetch_hf_adapter_files(link)
378
-
379
- def incorporate_custom_adapter(custom_lora):
380
- global loras
381
- if custom_lora:
382
  try:
383
- title, repo, path, trigger_word, image = validate_custom_adapter(custom_lora)
384
- print(f"Loaded custom LoRA: {repo}")
385
- card = f'''
386
- <div class="custom_lora_card">
387
- <span>Loaded custom LoRA:</span>
388
- <div class="card_internal">
389
- <img src="{image}" />
390
- <div>
391
- <h3>{title}</h3>
392
- <small>{"Using: <code><b>"+trigger_word+"</code></b> as the trigger word" if trigger_word else "No trigger word found. If there's a trigger word, include it in your prompt"}<br></small>
393
- </div>
394
- </div>
395
- </div>
396
- '''
397
- existing_item_index = next((index for (index, item) in enumerate(loras) if item['repo'] == repo), None)
398
- if existing_item_index is None:
399
- new_item = {
400
- "image": image,
401
- "title": title,
402
- "repo": repo,
403
- "weights": path,
404
- "trigger_word": trigger_word
405
- }
406
- print(new_item)
407
- loras.append(new_item)
408
- existing_item_index = len(loras) - 1 # Get the actual index after adding
409
-
410
- return gr.update(visible=True, value=card), gr.update(visible=True), gr.Gallery(selected_index=None), f"Custom: {path}", existing_item_index, trigger_word
411
  except Exception as e:
412
- gr.Warning(f"Invalid LoRA: either you entered an invalid link, or a non-Qwen-Image LoRA, this was the issue: {e}")
413
- return gr.update(visible=True, value=f"Invalid LoRA: either you entered an invalid link, a non-Qwen-Image LoRA"), gr.update(visible=True), gr.update(), "", None, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  else:
415
- return gr.update(visible=False), gr.update(visible=False), gr.update(), "", None, ""
416
-
417
- def discard_custom_adapter():
418
- return gr.update(visible=False), gr.update(visible=False), gr.update(), "", None, ""
419
-
420
- process_adapter_generation.zerogpu = True
421
-
422
- css = '''
423
- #gen_btn{height: 100%}
424
- #gen_column{align-self: stretch}
425
- #title{text-align: center}
426
- #title h1{font-size: 3em; display:inline-flex; align-items:center}
427
- #title img{width: 100px; margin-right: 0.5em}
428
- #gallery .grid-wrap{height: 10vh}
429
- #lora_list{background: var(--block-background-fill);padding: 0 1em .3em; font-size: 90%}
430
- .card_internal{display: flex;height: 100px;margin-top: .5em}
431
- .card_internal img{margin-right: 1em}
432
- .styler{--form-gap-width: 0px !important}
433
- #speed_status{padding: .5em; border-radius: 5px; margin: 1em 0}
434
- '''
435
-
436
- with gr.Blocks(theme="bethecloud/storj_theme", css=css, delete_cache=(240, 240)) as app:
437
- title = gr.HTML("""<h1>Qwen Image LoRA DLC⛵</h1>""", elem_id="title")
438
- selected_index = gr.State(None)
439
-
440
- with gr.Row():
441
- with gr.Column(scale=3):
442
- prompt = gr.Textbox(label="Prompt", lines=1, placeholder="✦︎ Choose the LoRA and type the prompt")
443
- with gr.Column(scale=1, elem_id="gen_column"):
444
- generate_button = gr.Button("Generate", variant="primary", elem_id="gen_btn")
445
-
446
- with gr.Row():
447
- with gr.Column():
448
- selected_info = gr.Markdown("")
449
- gallery = gr.Gallery(
450
- [(item["image"], item["title"]) for item in loras],
451
- label="LoRA Gallery",
452
- allow_preview=False,
453
- columns=3,
454
- elem_id="gallery",
455
- show_share_button=False
456
- )
457
- with gr.Group():
458
- custom_lora = gr.Textbox(label="Custom LoRA", placeholder="username/lora-model-name")
459
- gr.Markdown("[Check Qwen-Image LoRAs](https://huggingface.co/models?other=base_model:adapter:Qwen/Qwen-Image)", elem_id="lora_list")
460
- custom_lora_info = gr.HTML(visible=False)
461
- custom_lora_button = gr.Button("Remove custom LoRA", visible=False)
462
-
463
- with gr.Column():
464
- result = gr.Image(label="Generated Image", format="png")
465
-
466
- with gr.Row():
467
- aspect_ratio = gr.Dropdown(
468
- label="Aspect Ratio",
469
- choices=["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"],
470
- value="3:2"
471
- )
472
- with gr.Row():
473
- speed_mode = gr.Dropdown(
474
- label="Output Mode",
475
- choices=["Fast (8 steps)", "Base (50 steps)"],
476
- value="Base (50 steps)",
477
- )
478
-
479
- speed_status = gr.Markdown("Base mode selected - 50 steps for best quality", elem_id="speed_status")
480
-
481
- with gr.Row():
482
- with gr.Accordion("Advanced Settings", open=False):
483
- with gr.Column():
484
  with gr.Row():
485
- cfg_scale = gr.Slider(
486
- label="Guidance Scale (True CFG)",
487
- minimum=1.0,
488
- maximum=5.0,
489
- step=0.1,
490
- value=4.0,
491
- info="Lower for speed mode, higher for quality"
492
- )
493
- steps = gr.Slider(
494
- label="Steps",
495
- minimum=4,
496
- maximum=50,
497
- step=1,
498
- value=50,
499
- info="Automatically set by speed mode"
500
- )
501
-
502
  with gr.Row():
503
- randomize_seed = gr.Checkbox(True, label="Randomize seed")
504
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, randomize=True)
505
- lora_scale = gr.Slider(label="LoRA Scale", minimum=0, maximum=2, step=0.01, value=1.0)
506
-
507
- # Event handlers
508
- gallery.select(
509
- handle_lora_selection,
510
- inputs=[aspect_ratio],
511
- outputs=[prompt, selected_info, selected_index, aspect_ratio]
512
- )
513
-
514
- speed_mode.change(
515
- adjust_generation_mode,
516
- inputs=[speed_mode],
517
- outputs=[speed_status, steps, cfg_scale]
518
- )
519
-
520
- custom_lora.input(
521
- incorporate_custom_adapter,
522
- inputs=[custom_lora],
523
- outputs=[custom_lora_info, custom_lora_button, gallery, selected_info, selected_index, prompt]
524
- )
525
-
526
- custom_lora_button.click(
527
- discard_custom_adapter,
528
- outputs=[custom_lora_info, custom_lora_button, gallery, selected_info, selected_index, custom_lora]
529
- )
530
-
531
- gr.on(
532
- triggers=[generate_button.click, prompt.submit],
533
- fn=process_adapter_generation,
534
- inputs=[prompt, cfg_scale, steps, selected_index, randomize_seed, seed, aspect_ratio, lora_scale, speed_mode],
535
- outputs=[result, seed]
536
- )
537
-
538
- app.queue()
539
- app.launch(share=False, ssr_mode=False, show_error=True)
 
 
 
 
 
 
 
1
  import os
2
+ import gradio_pdf
3
+ import hashlib
4
+ import re
5
  import time
6
+ import httpx
7
+ import oss2
8
+ import asyncio
9
+ import json
10
+ from typing import Dict, Any, Optional
11
+ from pathlib import Path
12
+ import click
13
  import gradio as gr
14
+ from io import BytesIO
15
+ from PIL import Image
16
+ from gradio_pdf import PDF
17
+ from loguru import logger
18
+ from datetime import datetime
19
+
20
+ # -- ADDED imports for model + PDF rendering --
21
+ import torch
22
+ from transformers import (
23
+ Qwen2VLForConditionalGeneration,
24
+ Qwen2_5_VLForConditionalGeneration,
25
+ AutoModelForCausalLM,
26
+ AutoModelForVision2Seq,
27
+ AutoProcessor,
28
+ TextIteratorStreamer,
29
+ )
30
+ from transformers.image_utils import load_image
31
+
32
+ # Optional PDF rendering dependency fallbacks
33
+ try:
34
+ import fitz # PyMuPDF
35
+ _HAS_FITZ = True
36
+ except Exception:
37
+ _HAS_FITZ = False
38
+
39
+ try:
40
+ from pdf2image import convert_from_bytes
41
+ _HAS_PDF2IMAGE = True
42
+ except Exception:
43
+ _HAS_PDF2IMAGE = False
44
+
45
+ # --------- original constants and helpers ----------
46
+ pdf_suffixes = [".pdf"]
47
+ image_suffixes = [".png", ".jpeg", ".jpg"]
48
+
49
+ latex_delimiters_type_a = [
50
+ {'left': '$$', 'right': '$$', 'display': True},
51
+ {'left': '$', 'right': '$', 'display': False},
52
+ ]
53
+ latex_delimiters_type_b = [
54
+ {'left': '\\(', 'right': '\\)', 'display': False},
55
+ {'left': '\\[', 'right': '\\]', 'display': True},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  ]
57
+ latex_delimiters_type_all = latex_delimiters_type_a + latex_delimiters_type_b
58
+
59
+ header_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'parsing/resources', 'header.html')
60
+ with open(header_path, 'r') as header_file:
61
+ header = header_file.read()
62
+
63
+ oss_access_key = os.getenv('oss_access_key')
64
+ oss_secret_key = os.getenv('oss_secret_key')
65
+ oss_endpoint = os.getenv('oss_endpoint')
66
+ oss_bucket_name = os.getenv('oss_bucket_name')
67
+ APP_KEY = os.getenv('APP_KEY')
68
+
69
+ # Initialize the OSS client
70
+ auth = oss2.Auth(oss_access_key, oss_secret_key)
71
+ oss_bucket = oss2.Bucket(auth, oss_endpoint, oss_bucket_name)
72
+
73
+
74
+ def upload_file_to_oss(local_data_path, oss_path):
75
+ with open(local_data_path, "rb") as f:
76
+ oss_bucket.put_object(oss_path, f)
77
+ url = oss_bucket.sign_url('GET', oss_path, 31536000)
78
+ return url
79
+
80
+
81
+ def str_md5(input_string):
82
+ hasher = hashlib.md5()
83
+ # In Python 3, strings need to be converted to byte objects to be processed by the hash function
84
+ input_bytes = input_string.encode('utf-8')
85
+ hasher.update(input_bytes)
86
+ return hasher.hexdigest()
87
+
88
 
89
+ def images_bytes_to_pdf_bytes(image_bytes):
90
+ # Memory buffer
91
+ pdf_buffer = BytesIO()
92
+
93
+ # Load and convert all images to RGB mode
94
+ image = Image.open(BytesIO(image_bytes)).convert("RGB")
95
+
96
+ # Save the first image as a PDF and append the rest
97
+ image.save(pdf_buffer, format="PDF", save_all=True)
98
+
99
+ # Get PDF bytes and reset the pointer (optional)
100
+ pdf_bytes = pdf_buffer.getvalue()
101
+ pdf_buffer.close()
102
+ return pdf_bytes
103
+
104
+
105
+ def read_fn(path):
106
+ if not isinstance(path, Path):
107
+ path = Path(path)
108
+ with open(str(path), "rb") as input_file:
109
+ file_bytes = input_file.read()
110
+ if path.suffix in image_suffixes:
111
+ return images_bytes_to_pdf_bytes(file_bytes)
112
+ elif path.suffix in pdf_suffixes:
113
+ return file_bytes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  else:
115
+ raise Exception(f"Unknown file suffix: {path.suffix}")
116
+
117
+
118
+ def safe_stem(file_path):
119
+ stem = Path(file_path).stem
120
+ # Keep only letters, numbers, underscores, and dots, and replace other characters with underscores
121
+ return re.sub(r'[^\w.]', '_', stem)
122
+
123
+
124
+ def sanitize_filename(filename: str, max_prefix_len: int = 15) -> str:
125
+ """
126
+ Sanitize filename: remove illegal characters, truncate, and add a hash to prevent duplicates.
127
+ """
128
+ # 1. Extract the extension
129
+ name, ext = '', ''
130
+ if '.' in filename:
131
+ name = filename.rsplit('.', 1)[0]
132
+ ext = '.' + filename.rsplit('.', 1)[1].lower()
133
  else:
134
+ name = filename
135
+ ext = ''
136
+
137
+ # 2. Remove illegal characters (Windows/Linux compatible)
138
+ # Allowed: letters, numbers, -_.()
139
+ name = re.sub(r'[\\/:\*\?"<>\|\s]+', '_', name) # Replace spaces and illegal characters with underscores
140
+ name = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', name) # Remove control characters
141
+
142
+ # 3. Truncate and reserve space for the hash
143
+ prefix = name[:max_prefix_len]
144
+
145
+ # 4. Add an MD5 prefix hash to ensure uniqueness (based on the original path or content)
146
+ hash_suffix = hashlib.md5(filename.encode('utf-8')).hexdigest()[:6]
147
+
148
+ # 5. Combine
149
+ safe_name = f"{prefix}_{hash_suffix}{ext}"
150
+
151
+ # 6. Prevent starting or ending with a dot (sensitive in some systems)
152
+ while safe_name.startswith('.'):
153
+ safe_name = safe_name[1:]
154
+ if len(safe_name) == 0:
155
+ safe_name = f"file_{hash_suffix}.bin"
156
+
157
+ if len(safe_name.encode('utf-8')) > 250:
158
+ # Fallback to an absolutely safe name
159
+ unique_hash = hashlib.md5(filename.encode('utf-8')).hexdigest()[:8]
160
+ safe_name = f"doc_{unique_hash}.pdf"
161
+
162
+ return safe_name
163
+
164
+ def to_pdf(file_path):
165
+ if file_path is None:
166
+ return None
167
+
168
+ pdf_bytes = read_fn(file_path)
169
+
170
+ # unique_filename = f'{uuid.uuid4()}.pdf'
171
+ unique_filename = f'{safe_stem(file_path)}.pdf'
172
+
173
+ # Construct the full file path
174
+ tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
175
+
176
+ # Write the byte data to the file
177
+ with open(tmp_file_path, 'wb') as tmp_pdf_file:
178
+ tmp_pdf_file.write(pdf_bytes)
179
+
180
+ return tmp_file_path
181
+
182
+
183
+ def arg_parse(ctx: 'click.Context') -> dict:
184
+ # Parse extra arguments
185
+ extra_kwargs = {}
186
+ i = 0
187
+ while i < len(ctx.args):
188
+ arg = ctx.args[i]
189
+ if arg.startswith('--'):
190
+ param_name = arg[2:].replace('-', '_') # Convert parameter name format
191
+ i += 1
192
+ if i < len(ctx.args) and not ctx.args[i].startswith('--'):
193
+ # The parameter has a value
194
+ try:
195
+ # Try to convert to the appropriate type
196
+ if ctx.args[i].lower() == 'true':
197
+ extra_kwargs[param_name] = True
198
+ elif ctx.args[i].lower() == 'false':
199
+ extra_kwargs[param_name] = False
200
+ elif '.' in ctx.args[i]:
201
+ try:
202
+ extra_kwargs[param_name] = float(ctx.args[i])
203
+ except ValueError:
204
+ extra_kwargs[param_name] = ctx.args[i]
205
+ else:
206
+ try:
207
+ extra_kwargs[param_name] = int(ctx.args[i])
208
+ except ValueError:
209
+ extra_kwargs[param_name] = ctx.args[i]
210
+ except:
211
+ extra_kwargs[param_name] = ctx.args[i]
212
  else:
213
+ # Boolean flag parameter
214
+ extra_kwargs[param_name] = True
215
+ i -= 1
216
+ i += 1
217
+ return extra_kwargs
218
+
219
+
220
+ # ----------------- NEW: local model integration -----------------
221
+ # Device detection
222
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
223
+ logger.info(f"Using device: {device}")
224
+
225
+ # Model ID - change if you want to use a different local model
226
+ MODEL_ID_M = "Logics-MLLM/Logics-Parsing"
227
+
228
+ # Load processor & model (may take time; expected)
229
+ try:
230
+ logger.info(f"Loading processor and model {MODEL_ID_M} ... (this can take a while)")
231
+ processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
232
+ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
233
+ MODEL_ID_M,
234
+ trust_remote_code=True,
235
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
236
+ ).to(device).eval()
237
+ logger.info("Model loaded successfully.")
238
+ except Exception as e:
239
+ logger.error(f"Failed to load model {MODEL_ID_M}: {e}")
240
+ # Do not raise here — but subsequent calls will error informatively
241
+ processor_m = None
242
+ model_m = None
243
+
244
+
245
+ def pdf_bytes_to_images(pdf_bytes: bytes, max_pages: Optional[int] = 20):
246
+ """
247
+ Convert PDF bytes to a list of PIL Images (page thumbnails). Uses fitz if available or pdf2image as fallback.
248
+ Limits pages to max_pages for speed/memory reasons.
249
+ """
250
+ images = []
251
+ if _HAS_FITZ:
252
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
253
+ for i in range(min(len(doc), max_pages)):
254
+ page = doc[i]
255
+ # zoom matrix to increase resolution
256
+ mat = fitz.Matrix(2, 2)
257
+ pix = page.get_pixmap(matrix=mat, alpha=False)
258
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
259
+ images.append(img)
260
+ doc.close()
261
+ return images
262
+ elif _HAS_PDF2IMAGE:
263
+ pil_images = convert_from_bytes(pdf_bytes, fmt="png")
264
+ return pil_images[:max_pages]
265
  else:
266
+ raise RuntimeError("No PDF rendering backend available. Install PyMuPDF (fitz) or pdf2image + poppler.")
267
+
268
+
269
+ def build_parser_prompt(filename: str, num_pages: int) -> str:
270
+ """
271
+ Construct a prompt instructing the model to parse a document and return:
272
+ - mmd (markdown-like MMD)
273
+ - qwenHtml (HTML)
274
+ - mmdHtml (rendered HTML)
275
+ Return format should be clearly mark-delimited so we can extract parts.
276
+ """
277
+ prompt = f"""You are a document parsing assistant.
278
+ Input: a multi-page PDF document named "{filename}" with {num_pages} pages (images of pages are provided).
279
+ Task: Extract the document content in three outputs:
280
+ ---BEGIN_MMD---
281
+ Provide the document structure and content in MMD (Markdown-like) format. Keep code blocks, equations and tables preserved.
282
+ ---END_MMD---
283
+
284
+ ---BEGIN_MMD_HTML---
285
+ Provide an HTML rendering of the MMD (a full HTML fragment).
286
+ ---END_MMD_HTML---
287
+
288
+ ---BEGIN_QWEN_HTML---
289
+ Provide the Qwen-specific HTML (if applicable). If none, output a short HTML wrapper.
290
+ ---END_QWEN_HTML---
291
+
292
+ Only output the three sections exactly between the markers above. Do not output other commentary."""
293
+ return prompt
294
+
295
+
296
+ def run_model_on_pages(images: list, filename: str, max_new_tokens: int = 2048) -> str:
297
+ """
298
+ Run the vision-language model on the provided list of PIL images and return the generated text.
299
+ This is a blocking function — call it inside asyncio.to_thread when used from async context.
300
+ """
301
+ if processor_m is None or model_m is None:
302
+ raise RuntimeError("Processor or model not loaded. Check logs — model loading failed earlier.")
303
+
304
+ # Build a single prompt (the processor will accept images+text)
305
+ prompt = build_parser_prompt(filename, len(images))
306
+
307
+ # Prepare inputs for processor.
308
+ # Many VL processors accept a single text prompt + list of images.
309
+ # We pass the first N images (or all) and let the model generate.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  try:
311
+ inputs = processor_m(
312
+ text=[prompt],
313
+ images=images,
314
+ return_tensors="pt",
315
+ padding=True,
316
+ truncation=False,
317
+ max_length=4096 # conservative; may vary by model
318
+ )
 
 
 
 
 
319
  except Exception as e:
320
+ # Fallback: try tokenizing text only and provide images separately if needed
321
+ logger.warning(f"Processor call with images failed: {e}. Trying text-only processing.")
322
+ inputs = processor_m(text=[prompt], return_tensors="pt", padding=True).to(device)
323
+
324
+ # Move tensors to device if present
325
+ for k, v in list(inputs.items()):
326
+ try:
327
+ inputs[k] = v.to(device)
328
+ except Exception:
329
+ pass
330
+
331
+ gen_kwargs = {
332
+ **inputs,
333
+ "max_new_tokens": max_new_tokens,
334
+ "temperature": 0.2,
335
+ # you can add other generation params if needed
336
+ }
337
+
338
+ # Some models expect generate() to be called with different keys — keep try/except.
339
+ with torch.no_grad():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  try:
341
+ outputs = model_m.generate(**gen_kwargs)
342
+ # decode output
343
+ generated_text = processor_m.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  except Exception as e:
345
+ logger.warning(f"Direct model.generate failed: {e}. Trying streaming decoder approach.")
346
+ # fallback: run a smaller streaming loop or different interface
347
+ # Here we attempt to use the model's text generation with .generate from input_ids
348
+ if "input_ids" in inputs:
349
+ try:
350
+ outputs = model_m.generate(input_ids=inputs["input_ids"].to(device), max_new_tokens=max_new_tokens)
351
+ generated_text = processor_m.decode(outputs[0], skip_special_tokens=True)
352
+ except Exception as e2:
353
+ logger.error(f"Fallback generation also failed: {e2}")
354
+ raise
355
+ else:
356
+ raise
357
+
358
+ # Basic cleanup
359
+ generated_text = generated_text.strip()
360
+ return generated_text
361
+
362
+
363
+ async def call_pdf_parse_async(file_name: str, pdf_url: str, app_key: str, user_id: str) -> Dict[str, Any]:
364
+ """
365
+ Replacement for the remote API. Downloads the PDF (signed URL), runs the local VL model to parse,
366
+ and returns a dict shaped like the remote API response: {'data': {...}}.
367
+ """
368
+ start_time = time.time()
369
+ # 1) download pdf bytes from the signed URL
370
+ try:
371
+ logger.info(f"Downloading PDF from url (signed) for local parsing: {pdf_url}")
372
+ async with httpx.AsyncClient() as client:
373
+ resp = await client.get(pdf_url, timeout=120)
374
+ if resp.status_code != 200:
375
+ raise Exception(f"Failed to download pdf from {pdf_url}, status {resp.status_code}")
376
+ pdf_bytes = resp.content
377
+ except Exception as e:
378
+ logger.error(f"Failed to download pdf bytes: {e}")
379
+ raise
380
+
381
+ # 2) convert pdf bytes to images (pages)
382
+ try:
383
+ page_images = await asyncio.to_thread(pdf_bytes_to_images, pdf_bytes, 20)
384
+ except Exception as e:
385
+ logger.error(f"PDF to images conversion failed: {e}")
386
+ raise
387
+
388
+ # 3) run model on pages (blocking, put into thread)
389
+ parse_start = time.time()
390
+ try:
391
+ generated_text = await asyncio.to_thread(run_model_on_pages, page_images, file_name, 4096)
392
+ except Exception as e:
393
+ logger.error(f"Model generation failed: {e}")
394
+ raise
395
+ parse_end = time.time()
396
+
397
+ # 4) extract sections from generated_text using markers (best-effort)
398
+ def extract_section(full_text: str, start_marker: str, end_marker: str) -> str:
399
+ s = full_text.find(start_marker)
400
+ e = full_text.find(end_marker, s + len(start_marker)) if s != -1 else -1
401
+ if s != -1 and e != -1:
402
+ return full_text[s + len(start_marker):e].strip()
403
+ return ""
404
+
405
+ mmd = extract_section(generated_text, "---BEGIN_MMD---", "---END_MMD---")
406
+ mmd_html = extract_section(generated_text, "---BEGIN_MMD_HTML---", "---END_MMD_HTML---")
407
+ qwen_html = extract_section(generated_text, "---BEGIN_QWEN_HTML---", "---END_QWEN_HTML---")
408
+
409
+ # If extraction failed, fallback to using the whole generated_text in all fields (graceful)
410
+ if not any([mmd, mmd_html, qwen_html]):
411
+ mmd = generated_text
412
+ mmd_html = f"<pre>{generated_text}</pre>"
413
+ qwen_html = f"<pre>{generated_text}</pre>"
414
+
415
+ end_time = time.time()
416
+ waiting_cost = 0 # local parse no queue
417
+ parsing_cost = parse_end - parse_start
418
+ total_cost = end_time - start_time
419
+
420
+ result = {
421
+ "data": {
422
+ "mmd": mmd,
423
+ "qwenHtml": qwen_html,
424
+ "downloadUrl": pdf_url,
425
+ "mmdHtml": mmd_html,
426
+ "waitingCostTime": waiting_cost,
427
+ "parsingCostTime": parsing_cost,
428
+ "totalCostTime": total_cost
429
+ }
430
+ }
431
+
432
+ logger.info(f"Local parsing finished: parsing_time={parsing_cost:.2f}s total_time={total_cost:.2f}s")
433
+ return result
434
+
435
+ # ----------------- end of model integration -----------------
436
+
437
+ # The rest of your code remains functionally identical. I kept it verbatim below.
438
+
439
+ async def pdf_parse(file_path, request: gr.Request):
440
+ headers = request.headers
441
+ print(f'headers: {headers}')
442
+ user_id = headers.get("X-Modelscope-Router-Id")
443
+ cookies = request.cookies
444
+ print(f'cookies: {cookies}')
445
+ cna = cookies.get('cna')
446
+ print(f'user_id: {user_id}, cna: {cna}')
447
+ ip = request.client.host
448
+ print(f'ip: {ip}')
449
+ if (user_id is None or user_id == '') and (cna is None or cna == ''):
450
+ user_id = "visitor"
451
+ if file_path is None:
452
+ logger.warning("file_path is None")
453
+ return (
454
+ "<p>Please upload a PDF file</p>",
455
+ "",
456
+ "<p>No input file</p>",
457
+ None,
458
+ None,
459
+ "Error: No file provided"
460
+ )
461
+ logger.info(f'file_path: {file_path}')
462
+ today = datetime.now().strftime("%Y-%m-%d")
463
+ file_name = Path(file_path).name
464
+ safe_file_name = sanitize_filename(file_name, 12)
465
+ print(f'safe_file_name: {safe_file_name}')
466
+ oss_path = f"model_scope/pdf_parse/input/{today}/{safe_file_name}"
467
+ url = upload_file_to_oss(file_path, oss_path)
468
+ logger.info(f'url: {url}')
469
+
470
+ # IMPORTANT: we now call the local model-based parser (no remote API)
471
+ result = await call_pdf_parse_async(safe_file_name, url, APP_KEY, user_id or cna)
472
+
473
+ if result is None:
474
+ logger.info(f'result is None')
475
+ return (
476
+ "<p>The parsing service is not responding. Please try again later.</p>",
477
+ "",
478
+ "<p>Service temporarily unavailable</p>",
479
+ None,
480
+ None,
481
+ "Error: The service did not return a response"
482
+ )
483
+ data = result.get('data', {})
484
+ mmd = data.get('mmd')
485
+ qwen_html = data.get('qwenHtml')
486
+ download_url = data.get('downloadUrl')
487
+ logger.info(f'download_url: {download_url}')
488
+ mmd_html = data.get('mmdHtml')
489
+ waiting_cost_time = data.get('waitingCostTime')
490
+ parsing_cost_time = data.get('parsingCostTime')
491
+ total_cost_time = data.get('totalCostTime')
492
+ # qwen_html = data.get('qwen_html')
493
+ # download_url = data.get('download_url')
494
+ # mmd_html = data.get('mmd_html')
495
+ cost_time = f'Queue waiting time: {waiting_cost_time}, Parsing time: {parsing_cost_time}, Total time: {total_cost_time}'
496
+
497
+ return mmd_html, mmd, qwen_html, download_url, url, cost_time
498
+
499
+
500
+ @click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
501
+ @click.pass_context
502
+ @click.option(
503
+ '--latex-delimiters-type',
504
+ 'latex_delimiters_type',
505
+ type=click.Choice(['a', 'b', 'all']),
506
+ help="Set the type of LaTeX delimiters to use in Markdown rendering:"
507
+ "'a' for type '$', 'b' for type '()[]', 'all' for both types.",
508
+ default='all',
509
+ )
510
+ def main(ctx, latex_delimiters_type, **kwargs):
511
+ kwargs.update(arg_parse(ctx))
512
+ if latex_delimiters_type == 'a':
513
+ latex_delimiters = latex_delimiters_type_a
514
+ elif latex_delimiters_type == 'b':
515
+ latex_delimiters = latex_delimiters_type_b
516
+ elif latex_delimiters_type == 'all':
517
+ latex_delimiters = latex_delimiters_type_all
518
  else:
519
+ raise ValueError(f"Invalid latex delimiters type: {latex_delimiters_type}.")
520
+
521
+ suffixes = pdf_suffixes + image_suffixes
522
+ with gr.Blocks(head='''
523
+ <meta name="data-spm" content="label" />
524
+ <meta name="aplus-core" content="aplus.js" />
525
+ <meta name="aplus-ifr-pv" content="1"/>
526
+ <meta name="aplus-iframe-ignore-i" content="on" />
527
+ <script>
528
+ window.APLUS_CONFIG = {
529
+ pid: 'aidata',
530
+ };
531
+ (function (w, d, s, q) {
532
+ w[q] = w[q] || [];
533
+ var f = d.getElementsByTagName(s)[0],
534
+ j = d.createElement(s);
535
+ j.async = true;
536
+ j.id = 'beacon-aplus';
537
+ var userIdParam = '';
538
+ j.setAttribute(
539
+ 'exparams',
540
+ 'userid=' +
541
+ userIdParam +
542
+ '&aplus&sidx=aplusSidex&ckx=aplusCkx'
543
+ );
544
+ j.src = '//g.alicdn.com/alilog/mlog/aplus_v2.js';
545
+ j.crossorigin = 'anonymous';
546
+ f.parentNode.insertBefore(j, f);
547
+ })(window, document, 'script', 'aplus_queue');
548
+ </script>
549
+ ''') as demo:
550
+ gr.HTML(header)
551
+ with gr.Row():
552
+ with gr.Column(variant='panel', scale=5):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  with gr.Row():
554
+ input_file = gr.File(label='Please upload a PDF or image (Max 20 pages for conversion)',
555
+ file_types=suffixes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
556
  with gr.Row():
557
+ change_bu = gr.Button('Convert')
558
+ clear_bu = gr.ClearButton(value='Clear')
559
+ pdf_show = PDF(label='PDF Preview', interactive=False, visible=True, height=800)
560
+
561
+ example_root = os.path.join(os.getcwd(), 'parsing/examples')
562
+ print(example_root)
563
+ logger.info(f'example_root: {example_root}')
564
+ if os.path.exists(example_root):
565
+ with gr.Accordion('Examples:'):
566
+ gr.Examples(
567
+ examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
568
+ _.endswith(tuple(suffixes))],
569
+ inputs=input_file
570
+ )
571
+
572
+ with gr.Column(variant='panel', scale=5):
573
+ output_file = gr.File(label='Conversion Result', interactive=False)
574
+ cost_time = gr.Text(label='Time Cost')
575
+ with gr.Tabs():
576
+ with gr.Tab('MMD Rendering'):
577
+ mmd_html = gr.HTML(label='MMD Rendering')
578
+ # with gr.Tab('mmd html text'):
579
+ # mmd_html_text = gr.TextArea(lines=45, show_copy_button=True)
580
+ with gr.Tab('MMD'):
581
+ mmd = gr.TextArea(lines=45, show_copy_button=True)
582
+ with gr.Tab('Qwen HTML'):
583
+ raw_html = gr.TextArea(lines=45, show_copy_button=True)
584
+
585
+ clear_bu.add([input_file, pdf_show, mmd, raw_html, output_file, mmd_html, cost_time])
586
+ cna = gr.Textbox(visible=False)
587
+ input_file.change(fn=to_pdf, inputs=input_file, outputs=pdf_show)
588
+ change_bu.click(
589
+ fn=pdf_parse,
590
+ inputs=[input_file],
591
+ outputs=[mmd_html, mmd, raw_html, output_file, pdf_show, cost_time],
592
+ concurrency_limit=15
593
+ )
594
+
595
+ demo.launch()
596
+
597
+
598
+ if __name__ == '__main__':
599
+ main()