SohomToom commited on
Commit
027067b
·
verified ·
1 Parent(s): 3ee45c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -56
app.py CHANGED
@@ -1,28 +1,13 @@
1
  import os
2
  import shutil
3
  import tempfile
 
 
 
4
  import cv2
5
  import numpy as np
6
- import gradio as gr
7
- import zipfile
8
- import patoolib
9
- import threading
10
  from paddleocr import PaddleOCR
11
  from PIL import Image
12
- import io
13
-
14
- def delayed_cleanup(path, delay=30):
15
- def cleanup():
16
- import time
17
- time.sleep(delay)
18
- try:
19
- if os.path.isdir(path):
20
- shutil.rmtree(path)
21
- elif os.path.exists(path):
22
- os.remove(path)
23
- except Exception as e:
24
- print(f"Cleanup failed: {e}")
25
- threading.Thread(target=cleanup).start()
26
 
27
  ocr = PaddleOCR(use_angle_cls=True, lang='en', det_model_dir='models/det', rec_model_dir='models/rec', cls_model_dir='models/cls')
28
 
@@ -75,7 +60,9 @@ def remove_text_dynamic_fill(img_path, output_path):
75
  image = cv2.imread(img_path)
76
  if image is None:
77
  return
78
- if len(image.shape) == 2 or image.shape[2] == 1:
 
 
79
  image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
80
  else:
81
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
@@ -88,57 +75,50 @@ def remove_text_dynamic_fill(img_path, output_path):
88
  x_min, y_min, x_max, y_max = bbox
89
  height = y_max - y_min
90
  padding = 2 if height <= 30 else 4 if height <= 60 else 6
 
91
  x_min_p = max(0, x_min - padding)
92
  y_min_p = max(0, y_min - padding)
93
  x_max_p = min(image.shape[1]-1, x_max + padding)
94
  y_max_p = min(image.shape[0]-1, y_max + padding)
 
95
  sample_crop = image[y_min_p:y_max_p, x_min_p:x_max_p]
96
  avg_color = np.mean(sample_crop.reshape(-1, 3), axis=0)
97
  fill_color = classify_background_color(avg_color)
98
  if fill_color is None:
99
  fill_color = sample_border_color(image, (x_min, y_min, x_max, y_max))
 
100
  cv2.rectangle(image, (x_min_p, y_min_p), (x_max_p, y_max_p), fill_color, -1)
101
 
102
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
103
  cv2.imwrite(output_path, image)
104
 
105
- def extract_comic_archive(archive_path, extract_to):
106
- if archive_path.endswith(".cbz"):
107
- with zipfile.ZipFile(archive_path, 'r') as zip_ref:
108
- zip_ref.extractall(extract_to)
109
- elif archive_path.endswith(".cbr"):
110
- patoolib.extract_archive(archive_path, outdir=extract_to)
111
-
112
- def process_cbz_cbr(input_files):
113
- temp_output = tempfile.mkdtemp()
114
-
115
- for file in input_files:
116
- filename = os.path.basename(file)
117
- output_path = os.path.join(temp_output, filename)
118
- remove_text_dynamic_fill(file, output_path)
119
 
120
- zip_bytes = io.BytesIO()
121
- with zipfile.ZipFile(zip_bytes, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
122
- for root, _, files in os.walk(temp_output):
 
 
 
 
 
 
 
 
123
  for file in files:
124
- file_path = os.path.join(root, file)
125
- arcname = os.path.relpath(file_path, temp_output)
126
- zf.write(file_path, arcname)
127
-
128
- zip_bytes.seek(0)
129
- zip_path = os.path.join(tempfile.gettempdir(), "cleaned_output.zip")
130
- with open(zip_path, "wb") as f:
131
- f.write(zip_bytes.read())
132
-
133
- return zip_path
134
-
135
-
136
- demo = gr.Interface(
137
- fn=process_cbz_cbr,
138
- inputs=gr.File(file_types=[".cbz", ".cbr"], label="Upload Comic Archive (.cbz or .cbr)"),
139
- outputs=gr.File(label="Download Cleaned Zip"),
140
- title="Comic Text Cleaner (.cbz/.cbr)",
141
- description="Upload a .cbz or .cbr file and get a zip of cleaned comic images (text removed using PaddleOCR)."
142
- )
143
 
144
- demo.launch()
 
1
  import os
2
  import shutil
3
  import tempfile
4
+ import zipfile
5
+ import rarfile
6
+ import gradio as gr
7
  import cv2
8
  import numpy as np
 
 
 
 
9
  from paddleocr import PaddleOCR
10
  from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  ocr = PaddleOCR(use_angle_cls=True, lang='en', det_model_dir='models/det', rec_model_dir='models/rec', cls_model_dir='models/cls')
13
 
 
60
  image = cv2.imread(img_path)
61
  if image is None:
62
  return
63
+ if len(image.shape) == 2:
64
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
65
+ elif image.shape[2] == 1:
66
  image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
67
  else:
68
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
75
  x_min, y_min, x_max, y_max = bbox
76
  height = y_max - y_min
77
  padding = 2 if height <= 30 else 4 if height <= 60 else 6
78
+
79
  x_min_p = max(0, x_min - padding)
80
  y_min_p = max(0, y_min - padding)
81
  x_max_p = min(image.shape[1]-1, x_max + padding)
82
  y_max_p = min(image.shape[0]-1, y_max + padding)
83
+
84
  sample_crop = image[y_min_p:y_max_p, x_min_p:x_max_p]
85
  avg_color = np.mean(sample_crop.reshape(-1, 3), axis=0)
86
  fill_color = classify_background_color(avg_color)
87
  if fill_color is None:
88
  fill_color = sample_border_color(image, (x_min, y_min, x_max, y_max))
89
+
90
  cv2.rectangle(image, (x_min_p, y_min_p), (x_max_p, y_max_p), fill_color, -1)
91
 
92
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
93
  cv2.imwrite(output_path, image)
94
 
95
+ def process_cbz_cbr(files):
96
+ final_output = tempfile.mkdtemp()
97
+
98
+ for file_path in files:
99
+ if file_path.endswith(".cbz"):
100
+ with zipfile.ZipFile(file_path, 'r') as archive:
101
+ extract_dir = tempfile.mkdtemp()
102
+ archive.extractall(extract_dir)
103
+ elif file_path.endswith(".cbr"):
104
+ with rarfile.RarFile(file_path) as archive:
105
+ extract_dir = tempfile.mkdtemp()
106
+ archive.extractall(extract_dir)
107
+ else:
108
+ continue
109
 
110
+ for root, _, imgs in os.walk(extract_dir):
111
+ for img in imgs:
112
+ if img.lower().endswith(('.jpg', '.jpeg', '.png')):
113
+ input_path = os.path.join(root, img)
114
+ output_path = os.path.join(final_output, os.path.basename(img))
115
+ remove_text_dynamic_fill(input_path, output_path)
116
+
117
+ # Create output zip
118
+ zip_buffer = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
119
+ with zipfile.ZipFile(zip_buffer.name, 'w', zipfile.ZIP_DEFLATED) as zf:
120
+ for root, _, files in os.walk(final_output):
121
  for file in files:
122
+ zf.write(os.path.join(root, file), arcname=file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ return zip_buffer.name