bartwisch commited on
Commit
3675e5e
·
1 Parent(s): 0efdeb2

feat: add Gradio app for ZeroGPU

Browse files
Files changed (3) hide show
  1. .github/.gitkeep +0 -0
  2. README.md +4 -4
  3. gradio_app.py +255 -0
.github/.gitkeep ADDED
File without changes
README.md CHANGED
@@ -3,9 +3,9 @@ title: Manga Translator
3
  emoji: 📚
4
  colorFrom: pink
5
  colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.40.0
8
- app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
@@ -46,7 +46,7 @@ Best option – provides **16 GB RAM** for free, which is needed for the OCR mod
46
 
47
  1. Go to **[huggingface.co/spaces](https://huggingface.co/spaces)**
48
  2. Click **"Create new Space"**
49
- 3. Select **Streamlit** as SDK
50
  4. Clone this repo or link your GitHub repo
51
  5. The app will auto-deploy using the YAML header in this README
52
 
 
3
  emoji: 📚
4
  colorFrom: pink
5
  colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.28.3
8
+ app_file: gradio_app.py
9
  pinned: false
10
  license: mit
11
  ---
 
46
 
47
  1. Go to **[huggingface.co/spaces](https://huggingface.co/spaces)**
48
  2. Click **"Create new Space"**
49
+ 3. Select **Gradio** as SDK
50
  4. Clone this repo or link your GitHub repo
51
  5. The app will auto-deploy using the YAML header in this README
52
 
gradio_app.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from typing import List, Optional
4
+
5
+ import certifi
6
+ import gradio as gr
7
+ from PIL import Image
8
+
9
+ from src.pdf_handler import PDFHandler
10
+ from src.ocr_handler import OCRHandler
11
+ from src.translator import TranslatorService
12
+ from src.image_processor import ImageProcessor
13
+
14
+ # Fix SSL issues for HTTPS APIs (DeepL / OpenAI / xAI)
15
+ os.environ["SSL_CERT_FILE"] = certifi.where()
16
+
17
+ pdf_handler = PDFHandler()
18
+ image_processor = ImageProcessor()
19
+
20
+
21
+ def parse_page_range(range_str: str) -> List[int]:
22
+ """Parse a page range string (e.g., "1-3, 5, 7-9") into a list of 0-indexed integers."""
23
+ if not range_str or not range_str.strip():
24
+ return []
25
+
26
+ pages: List[int] = []
27
+ parts = [p.strip() for p in range_str.split(",") if p.strip()]
28
+ for part in parts:
29
+ if "-" in part:
30
+ start_str, end_str = part.split("-", 1)
31
+ try:
32
+ start = int(start_str)
33
+ end = int(end_str)
34
+ except ValueError:
35
+ continue
36
+ if start > end:
37
+ start, end = end, start
38
+ pages.extend(list(range(start, end + 1)))
39
+ else:
40
+ try:
41
+ pages.append(int(part))
42
+ except ValueError:
43
+ continue
44
+
45
+ unique_pages = sorted(set(p for p in pages if p > 0))
46
+ return [p - 1 for p in unique_pages]
47
+
48
+
49
+ def _build_translator(service_label: str, deepl_key: str, openai_key: str, xai_key: str) -> TranslatorService:
50
+ label_map = {
51
+ "Google Translate": "google",
52
+ "DeepL": "deepl",
53
+ "OpenAI GPT-4o-mini": "openai",
54
+ "xAI Grok": "xai",
55
+ }
56
+ service_type = label_map.get(service_label, "google")
57
+
58
+ api_key: Optional[str] = None
59
+ if service_type == "deepl":
60
+ api_key = deepl_key or None
61
+ elif service_type == "openai":
62
+ api_key = openai_key or None
63
+ elif service_type == "xai":
64
+ api_key = xai_key or None
65
+
66
+ if service_type in ["deepl", "openai", "xai"] and not api_key:
67
+ raise ValueError(f"Missing API key for {service_type} service.")
68
+
69
+ return TranslatorService(source="en", target="de", service_type=service_type, api_key=api_key)
70
+
71
+
72
+ def translate_manga(
73
+ pdf_path: str,
74
+ page_range: str,
75
+ ocr_engine: str,
76
+ bubble_threshold: float,
77
+ preprocess_mode: str,
78
+ translator_label: str,
79
+ deepl_key: str,
80
+ openai_key: str,
81
+ xai_key: str,
82
+ show_boxes: bool,
83
+ use_vision: bool,
84
+ ):
85
+ if not pdf_path:
86
+ return None, [], "Bitte eine PDF-Datei hochladen."
87
+
88
+ try:
89
+ pages = parse_page_range(page_range)
90
+ except Exception:
91
+ pages = []
92
+
93
+ if not pages:
94
+ pages = None
95
+
96
+ translator = _build_translator(translator_label, deepl_key, openai_key, xai_key)
97
+
98
+ ocr_handler: Optional[OCRHandler] = None
99
+ if not use_vision:
100
+ # On ZeroGPU we can safely enable GPU acceleration
101
+ ocr_handler = OCRHandler(lang_list=["en"], gpu=True, ocr_engine=ocr_engine)
102
+
103
+ progress = gr.Progress(track_tqdm=True)
104
+
105
+ progress(0.0, desc="PDF wird geladen und in Bilder umgewandelt...")
106
+ images: List[Image.Image] = pdf_handler.extract_images_from_pdf(pdf_path, zoom=1.5, pages=pages)
107
+ total = len(images)
108
+ if total == 0:
109
+ return None, [], "Keine Seiten im PDF gefunden."
110
+
111
+ processed_images: List[Image.Image] = []
112
+
113
+ for idx, img in enumerate(images):
114
+ progress((idx / total), desc=f"Verarbeite Seite {idx + 1} von {total}...")
115
+
116
+ text_regions = []
117
+
118
+ if use_vision:
119
+ vision_results = translator.translate_image_with_vision(img)
120
+ for item in vision_results:
121
+ bbox = item["bbox"]
122
+ original = item.get("original", "")
123
+ translated = item.get("translated", "")
124
+ text_regions.append((bbox, original, translated))
125
+ else:
126
+ ocr_results = ocr_handler.detect_and_group_text(
127
+ img,
128
+ distance_threshold=bubble_threshold,
129
+ preprocess_mode=preprocess_mode,
130
+ )
131
+ for bbox, text in ocr_results:
132
+ if len(text.strip()) < 2:
133
+ continue
134
+ translated_text = translator.translate_text(text)
135
+ text_regions.append((bbox, text, translated_text))
136
+
137
+ if show_boxes:
138
+ processed = image_processor.draw_boxes_only(img.copy(), text_regions)
139
+ else:
140
+ processed = image_processor.overlay_text(img.copy(), text_regions)
141
+
142
+ processed_images.append(processed)
143
+
144
+ tmp_dir = tempfile.mkdtemp(prefix="mangatranslator_")
145
+ output_pdf_path = os.path.join(tmp_dir, "translated_manga.pdf")
146
+
147
+ pdf_handler.save_images_as_pdf(processed_images, output_pdf_path)
148
+
149
+ progress(1.0, desc="Fertig!")
150
+
151
+ return output_pdf_path, processed_images, "Fertig! Du kannst das übersetzte PDF herunterladen."
152
+
153
+
154
+ with gr.Blocks() as demo:
155
+ gr.Markdown("# 📚 Manga Translator (Gradio + ZeroGPU)")
156
+ gr.Markdown(
157
+ "Lädt ein Manga-PDF, erkennt Sprechblasen per OCR und legt die deutsche Übersetzung in die Sprechblasen."
158
+ )
159
+
160
+ with gr.Row():
161
+ with gr.Column():
162
+ pdf_input = gr.File(
163
+ label="Manga PDF hochladen",
164
+ file_types=[".pdf"],
165
+ type="filepath",
166
+ )
167
+ page_range = gr.Textbox(
168
+ label="Seitenbereich (optional)",
169
+ placeholder="z.B. 1-5, 7, 10-12 (leer = alle Seiten)",
170
+ )
171
+
172
+ ocr_engine = gr.Radio(
173
+ label="OCR-Engine",
174
+ choices=["magi", "manga-ocr", "paddleocr", "easyocr"],
175
+ value="magi",
176
+ )
177
+ preprocess_mode = gr.Radio(
178
+ label="OCR Preprocessing",
179
+ choices=["gentle", "none", "aggressive"],
180
+ value="gentle",
181
+ )
182
+ bubble_threshold = gr.Slider(
183
+ label="Bubble-Gruppierung (Pixel)",
184
+ minimum=20,
185
+ maximum=200,
186
+ value=80,
187
+ step=5,
188
+ )
189
+ show_boxes = gr.Checkbox(
190
+ label="Nur Boxen zeichnen (Debug)",
191
+ value=False,
192
+ )
193
+
194
+ with gr.Column():
195
+ translator_label = gr.Radio(
196
+ label="Übersetzungsdienst",
197
+ choices=[
198
+ "Google Translate",
199
+ "DeepL",
200
+ "OpenAI GPT-4o-mini",
201
+ "xAI Grok",
202
+ ],
203
+ value="Google Translate",
204
+ )
205
+ use_vision = gr.Checkbox(
206
+ label="Vision-Modus (OpenAI/xAI Vision)",
207
+ value=False,
208
+ )
209
+ deepl_key = gr.Textbox(
210
+ label="DeepL API Key",
211
+ type="password",
212
+ visible=True,
213
+ )
214
+ openai_key = gr.Textbox(
215
+ label="OpenAI API Key",
216
+ type="password",
217
+ visible=True,
218
+ )
219
+ xai_key = gr.Textbox(
220
+ label="xAI API Key",
221
+ type="password",
222
+ visible=True,
223
+ )
224
+
225
+ run_btn = gr.Button("🚀 Übersetzen")
226
+
227
+ output_pdf = gr.File(label="Übersetztes PDF")
228
+ preview_gallery = gr.Gallery(
229
+ label="Vorschau der verarbeiteten Seiten",
230
+ columns=3,
231
+ height="auto",
232
+ )
233
+ status_box = gr.Textbox(label="Status", interactive=False)
234
+
235
+ run_btn.click(
236
+ fn=translate_manga,
237
+ inputs=[
238
+ pdf_input,
239
+ page_range,
240
+ ocr_engine,
241
+ bubble_threshold,
242
+ preprocess_mode,
243
+ translator_label,
244
+ deepl_key,
245
+ openai_key,
246
+ xai_key,
247
+ show_boxes,
248
+ use_vision,
249
+ ],
250
+ outputs=[output_pdf, preview_gallery, status_box],
251
+ )
252
+
253
+
254
+ if __name__ == "__main__":
255
+ demo.launch()