haaaaus commited on
Commit
ef0641f
·
verified ·
1 Parent(s): fc778e6

Upload 83 files

Browse files
Files changed (42) hide show
  1. .gitattributes +13 -0
  2. README.md +61 -16
  3. __pycache__/app.cpython-311.pyc +0 -0
  4. __pycache__/font_analyzer.cpython-311.pyc +0 -0
  5. __pycache__/process_bubble.cpython-311.pyc +0 -0
  6. app.py +491 -36
  7. font_analyzer.py +151 -0
  8. fonts/Yuki-Arenzi.ttf +0 -0
  9. fonts/Yuki-Burobu.ttf +3 -0
  10. fonts/Yuki-CCMarianChurchlandJournal.ttf +3 -0
  11. fonts/Yuki-CDX Starstreak.ttf +3 -0
  12. fonts/Yuki-CHICKEN Pie.ttf +3 -0
  13. fonts/Yuki-CrashLanding BB.ttf +0 -0
  14. fonts/Yuki-Downhill Dive.ttf +3 -0
  15. fonts/Yuki-Gingerline DEMO Regular.ttf +0 -0
  16. fonts/Yuki-Gorrilaz_Story.ttf +3 -0
  17. fonts/Yuki-KG Only Angel.ttf +3 -0
  18. fonts/Yuki-LF SwandsHand.ttf +0 -0
  19. fonts/Yuki-La Belle Aurore.ttf +0 -0
  20. fonts/Yuki-Little Cupcakes.ttf +3 -0
  21. fonts/Yuki-Nagurigaki Crayon.ttf +3 -0
  22. fonts/Yuki-Ripsnort BB.ttf +3 -0
  23. fonts/Yuki-Roasthink.ttf +0 -0
  24. fonts/Yuki-Screwball.ttf +0 -0
  25. fonts/Yuki-Shark Crash.ttf +3 -0
  26. fonts/Yuki-Skulduggery.ttf +3 -0
  27. fonts/Yuki-Superscratchy.ttf +0 -0
  28. fonts/Yuki-Tea And Oranges Regular.ttf +3 -0
  29. ocr/__pycache__/chrome_lens_ocr.cpython-311.pyc +0 -0
  30. ocr/chrome_lens_ocr.py +58 -0
  31. process_bubble.py +12 -1
  32. static/css/style.css +50 -0
  33. static/js/app.js +55 -0
  34. templates/index.html +148 -0
  35. templates/translate.html +20 -7
  36. translator/__pycache__/__init__.cpython-311.pyc +0 -0
  37. translator/__pycache__/copilot_translator.cpython-311.pyc +0 -0
  38. translator/__pycache__/gemini_translator.cpython-311.pyc +0 -0
  39. translator/__pycache__/translator.cpython-311.pyc +0 -0
  40. translator/copilot_translator.py +351 -0
  41. translator/gemini_translator.py +136 -50
  42. translator/translator.py +3 -1
.gitattributes CHANGED
@@ -47,3 +47,16 @@ examples/ex3.png filter=lfs diff=lfs merge=lfs -text
47
  fonts/ariali.ttf filter=lfs diff=lfs merge=lfs -text
48
  static/img/loading.gif filter=lfs diff=lfs merge=lfs -text
49
  static/img/back.jpg filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  fonts/ariali.ttf filter=lfs diff=lfs merge=lfs -text
48
  static/img/loading.gif filter=lfs diff=lfs merge=lfs -text
49
  static/img/back.jpg filter=lfs diff=lfs merge=lfs -text
50
+ fonts/Yuki-Burobu.ttf filter=lfs diff=lfs merge=lfs -text
51
+ fonts/Yuki-CCMarianChurchlandJournal.ttf filter=lfs diff=lfs merge=lfs -text
52
+ fonts/Yuki-CDX[[:space:]]Starstreak.ttf filter=lfs diff=lfs merge=lfs -text
53
+ fonts/Yuki-CHICKEN[[:space:]]Pie.ttf filter=lfs diff=lfs merge=lfs -text
54
+ fonts/Yuki-Downhill[[:space:]]Dive.ttf filter=lfs diff=lfs merge=lfs -text
55
+ fonts/Yuki-Gorrilaz_Story.ttf filter=lfs diff=lfs merge=lfs -text
56
+ fonts/Yuki-KG[[:space:]]Only[[:space:]]Angel.ttf filter=lfs diff=lfs merge=lfs -text
57
+ fonts/Yuki-Little[[:space:]]Cupcakes.ttf filter=lfs diff=lfs merge=lfs -text
58
+ fonts/Yuki-Nagurigaki[[:space:]]Crayon.ttf filter=lfs diff=lfs merge=lfs -text
59
+ fonts/Yuki-Ripsnort[[:space:]]BB.ttf filter=lfs diff=lfs merge=lfs -text
60
+ fonts/Yuki-Shark[[:space:]]Crash.ttf filter=lfs diff=lfs merge=lfs -text
61
+ fonts/Yuki-Skulduggery.ttf filter=lfs diff=lfs merge=lfs -text
62
+ fonts/Yuki-Tea[[:space:]]And[[:space:]]Oranges[[:space:]]Regular.ttf filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -7,24 +7,69 @@ sdk: docker
7
  pinned: false
8
  license: mit
9
  ---
 
10
 
11
- # Manga Translator
12
 
13
- Translate manga/webtoon speech bubbles automatically!
14
 
15
- ## Features
16
- - 🔍 YOLO-based bubble detection
17
- - 📝 Multiple OCR engines (Manga-OCR, Chrome Lens)
18
- - 🌐 Multiple translators (Google, Gemini, Bing, Baidu, NLLB)
19
- - 📏 Smart handling for long webtoon images
20
- - 🎨 Custom fonts support
21
 
22
- ## Usage
23
- 1. Upload manga/webtoon images
24
- 2. Select source and target languages
25
- 3. Choose translator and OCR engine
26
- 4. Click Translate!
27
 
28
- ## Supported Languages
29
- - Source: Japanese, Chinese, Korean, English
30
- - Target: Vietnamese, English, Chinese, Korean, Thai, Indonesian, French, German, Spanish, Russian
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  pinned: false
8
  license: mit
9
  ---
10
+ # Manga Translator 📚
11
 
12
+ Dịch tự động speech bubbles trong manga/manhwa/manhua!
13
 
14
+ ## Features
15
 
16
+ ### Core
17
+ - 🔍 **YOLO-based bubble detection** - Phát hiện speech bubble tự động
18
+ - 📝 **Multiple OCR engines** - Manga-OCR, Chrome Lens (batch support)
19
+ - 🌐 **Multiple translators** - Gemini, Copilot API, NLLB, Opus-MT
 
 
20
 
21
+ ### Translation
22
+ - 🧠 **Context Memory** - Sử dụng context từ tất cả ảnh để dịch chính xác hơn
23
+ - 🎯 **Multi-page batch translation** - Dịch 10 pages/API call tiết kiệm quota
24
+ - 🎨 **Translation styles** - Default, Casual, Formal, Keep Honorifics, Web Novel...
 
25
 
26
+ ### UI/UX
27
+ - 📊 **Real-time progress** - Progress bar hiển thị tiến độ theo từng phase
28
+ - 📦 **Download ZIP** - Tải tất cả ảnh đã dịch dưới dạng ZIP
29
+ - 🔤 **Auto font sizing** - Tự động điều chỉnh cỡ chữ theo bubble
30
+ - 📏 **24+ fonts** - Yuki fonts, AnimeAce, và nhiều font khác
31
+
32
+ ## 🚀 Usage
33
+
34
+ ```bash
35
+ # Install dependencies
36
+ pip install -r requirements.txt
37
+
38
+ # Run
39
+ python app.py
40
+ ```
41
+
42
+ Mở http://localhost:5000
43
+
44
+ ## 📋 Workflow
45
+
46
+ 1. Upload manga/manhwa images
47
+ 2. Chọn ngôn ngữ gốc (Japanese/Chinese/Korean/English)
48
+ 3. Chọn ngôn ngữ đích (Vietnamese, English, ...)
49
+ 4. Chọn translator (Gemini/Copilot) và OCR engine
50
+ 5. Check "Context Memory" để dịch chính xác hơn
51
+ 6. Click **Translate**!
52
+ 7. Xem progress bar real-time
53
+ 8. Download từng ảnh hoặc **Download ZIP**
54
+
55
+ ## 🌍 Supported Languages
56
+
57
+ | Source | Target |
58
+ |--------|--------|
59
+ | Japanese (Manga) | Vietnamese |
60
+ | Chinese (Manhua) | English |
61
+ | Korean (Manhwa) | Chinese |
62
+ | English (Comic) | Korean, Thai, Indonesian, French, German, Spanish, Russian |
63
+
64
+ ## 📡 API Keys
65
+
66
+ - **Gemini**: Nhập API key từ [ai.google.dev](https://ai.google.dev)
67
+ - **Copilot**: Chạy server [copilot-api](https://github.com/copilot-api) local
68
+
69
+ ## 🔧 Tech Stack
70
+
71
+ - Flask + Flask-SocketIO (real-time WebSocket)
72
+ - YOLOv8 (bubble detection)
73
+ - Manga-OCR / Chrome-Lens (OCR)
74
+ - Gemini / Copilot API (translation)
75
+ - PIL (text rendering)
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
__pycache__/font_analyzer.cpython-311.pyc ADDED
Binary file (9.07 kB). View file
 
__pycache__/process_bubble.cpython-311.pyc CHANGED
Binary files a/__pycache__/process_bubble.cpython-311.pyc and b/__pycache__/process_bubble.cpython-311.pyc differ
 
app.py CHANGED
@@ -1,4 +1,8 @@
1
- from flask import Flask, render_template, request, redirect
 
 
 
 
2
  from detect_bubbles import detect_bubbles
3
  from process_bubble import process_bubble
4
  from translator.translator import MangaTranslator
@@ -14,7 +18,10 @@ import os
14
 
15
  app = Flask(__name__)
16
  app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY", "secret_key")
17
- app.config["MAX_CONTENT_LENGTH"] = 50 * 1024 * 1024 # 50MB max upload
 
 
 
18
 
19
  MODEL_PATH = "model/model.pt"
20
 
@@ -24,10 +31,11 @@ def home():
24
  return render_template("index.html")
25
 
26
 
27
- def process_single_image(image, manga_translator, mocr, selected_translator, selected_font):
28
  """Process a single image and return the translated version.
29
 
30
  Optimized with batch translation for Gemini to reduce API calls.
 
31
  """
32
  results = detect_bubbles(MODEL_PATH, image)
33
 
@@ -37,11 +45,16 @@ def process_single_image(image, manga_translator, mocr, selected_translator, sel
37
  # Phase 1: Collect all bubble data and OCR texts
38
  bubble_data = []
39
  texts_to_translate = []
 
40
 
41
  for result in results:
42
  x1, y1, x2, y2, score, class_id = result
43
  detected_image = image[int(y1):int(y2), int(x1):int(x2)]
44
 
 
 
 
 
45
  # Fix: detected_image is already uint8, no need to multiply by 255
46
  im = Image.fromarray(detected_image)
47
  text = mocr(im)
@@ -55,13 +68,19 @@ def process_single_image(image, manga_translator, mocr, selected_translator, sel
55
  })
56
  texts_to_translate.append(text)
57
 
58
- # Phase 2: Batch translate (especially efficient for Gemini)
 
 
 
 
59
  if selected_translator == "gemini" and len(texts_to_translate) > 1:
60
  # Use batch translation for Gemini
61
  try:
62
  if manga_translator._gemini_translator is None:
63
  from translator.gemini_translator import GeminiTranslator
64
- api_key = manga_translator.gemini_api_key or "AIzaSyAplFKOKBEcQku5m6gPEBMlZMGc4sI5rgo"
 
 
65
  custom_prompt = getattr(manga_translator, '_gemini_custom_prompt', None)
66
  manga_translator._gemini_translator = GeminiTranslator(
67
  api_key=api_key,
@@ -76,35 +95,309 @@ def process_single_image(image, manga_translator, mocr, selected_translator, sel
76
  except Exception as e:
77
  print(f"Batch translation failed, falling back to single: {e}")
78
  translated_texts = [manga_translator.translate(t, method=selected_translator) for t in texts_to_translate]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  else:
80
  # Single translation for other translators
81
  translated_texts = [manga_translator.translate(t, method=selected_translator) for t in texts_to_translate]
82
 
83
  # Phase 3: Add translated text to bubbles
84
- font_path = f"fonts/{selected_font}i.ttf"
 
85
  for data, translated_text in zip(bubble_data, translated_texts):
86
  add_text(data['detected_image'], translated_text, font_path, data['contour'])
87
 
88
  return image
89
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  @app.route("/translate", methods=["POST"])
92
  def upload_file():
93
  # Get translator selection
94
  translator_map = {
95
  "Opus-mt model": "hf",
96
  "NLLB": "nllb",
97
- "Gemini": "gemini"
 
98
  }
99
  selected_translator = translator_map.get(
100
  request.form["selected_translator"],
101
  request.form["selected_translator"].lower()
102
  )
 
 
 
 
 
 
 
 
 
 
103
 
104
  # Get font selection
105
- selected_font = request.form["selected_font"].lower()
106
- if selected_font == "animeace":
107
- selected_font += "_"
 
 
 
 
 
 
 
 
108
 
109
  # Get OCR engine
110
  selected_ocr = request.form.get("selected_ocr", "chrome-lens").lower()
@@ -167,46 +460,170 @@ def upload_file():
167
  if selected_translator == "gemini" and style:
168
  manga_translator._gemini_custom_prompt = style
169
 
 
 
 
 
 
 
 
 
 
 
 
170
  if selected_ocr == "chrome-lens":
171
  mocr = ChromeLensOCR()
172
  else:
173
  mocr = MangaOcr()
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  # Process all images
176
  processed_images = []
 
177
 
178
- for file in files:
179
- if file and file.filename:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  try:
181
- # Read image
182
- file_stream = file.stream
183
- file_bytes = np.frombuffer(file_stream.read(), dtype=np.uint8)
184
- image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
185
-
186
- if image is None:
187
- continue
188
-
189
- # Get original filename
190
- name = os.path.splitext(file.filename)[0]
191
-
192
- # Process image
193
- processed_image = process_single_image(
194
- image, manga_translator, mocr,
195
- selected_translator, selected_font
 
 
 
 
196
  )
197
-
198
- # Encode to base64 (JPEG is 5-10x faster than PNG)
199
- _, buffer = cv2.imencode(".jpg", processed_image, [cv2.IMWRITE_JPEG_QUALITY, 95])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  encoded_image = base64.b64encode(buffer.tobytes()).decode("utf-8")
201
-
202
  processed_images.append({
203
- "name": name,
204
  "data": encoded_image
205
  })
206
-
207
  except Exception as e:
208
- print(f"Error processing {file.filename}: {e}")
209
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  if not processed_images:
212
  return redirect("/")
@@ -214,5 +631,43 @@ def upload_file():
214
  return render_template("translate.html", images=processed_images)
215
 
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  if __name__ == "__main__":
218
- app.run(debug=True)
 
1
+ from flask import Flask, render_template, request, redirect, send_file, jsonify
2
+ from flask_socketio import SocketIO, emit
3
+ import io
4
+ import zipfile
5
+ import json
6
  from detect_bubbles import detect_bubbles
7
  from process_bubble import process_bubble
8
  from translator.translator import MangaTranslator
 
18
 
19
  app = Flask(__name__)
20
  app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY", "secret_key")
21
+ # No upload size limit (removed MAX_CONTENT_LENGTH restriction)
22
+
23
+ # Initialize SocketIO for real-time progress updates
24
+ socketio = SocketIO(app, cors_allowed_origins="*", async_mode='threading')
25
 
26
  MODEL_PATH = "model/model.pt"
27
 
 
31
  return render_template("index.html")
32
 
33
 
34
+ def process_single_image(image, manga_translator, mocr, selected_translator, selected_font, font_analyzer=None):
35
  """Process a single image and return the translated version.
36
 
37
  Optimized with batch translation for Gemini to reduce API calls.
38
+ Supports auto font matching when font_analyzer is provided and selected_font is 'auto'.
39
  """
40
  results = detect_bubbles(MODEL_PATH, image)
41
 
 
45
  # Phase 1: Collect all bubble data and OCR texts
46
  bubble_data = []
47
  texts_to_translate = []
48
+ first_bubble_image = None # For font analysis
49
 
50
  for result in results:
51
  x1, y1, x2, y2, score, class_id = result
52
  detected_image = image[int(y1):int(y2), int(x1):int(x2)]
53
 
54
+ # Save first bubble for font analysis (before processing)
55
+ if first_bubble_image is None:
56
+ first_bubble_image = detected_image.copy()
57
+
58
  # Fix: detected_image is already uint8, no need to multiply by 255
59
  im = Image.fromarray(detected_image)
60
  text = mocr(im)
 
68
  })
69
  texts_to_translate.append(text)
70
 
71
+ # Auto font matching: analyze first bubble and select best font
72
+ # Note: font is now determined BEFORE processing, passed as selected_font
73
+ # (Analysis moved to upload_file to only run once per batch)
74
+
75
+ # Phase 2: Batch translate
76
  if selected_translator == "gemini" and len(texts_to_translate) > 1:
77
  # Use batch translation for Gemini
78
  try:
79
  if manga_translator._gemini_translator is None:
80
  from translator.gemini_translator import GeminiTranslator
81
+ api_key = getattr(manga_translator, '_gemini_api_key', None)
82
+ if not api_key:
83
+ raise ValueError("Gemini API key not provided")
84
  custom_prompt = getattr(manga_translator, '_gemini_custom_prompt', None)
85
  manga_translator._gemini_translator = GeminiTranslator(
86
  api_key=api_key,
 
95
  except Exception as e:
96
  print(f"Batch translation failed, falling back to single: {e}")
97
  translated_texts = [manga_translator.translate(t, method=selected_translator) for t in texts_to_translate]
98
+
99
+ elif selected_translator == "copilot" and len(texts_to_translate) > 1:
100
+ # Use batch translation for Copilot
101
+ try:
102
+ if not hasattr(manga_translator, '_copilot_translator') or manga_translator._copilot_translator is None:
103
+ from translator.copilot_translator import CopilotTranslator
104
+ copilot_server = getattr(manga_translator, '_copilot_server', 'http://localhost:8080')
105
+ copilot_model = getattr(manga_translator, '_copilot_model', 'gpt-4o')
106
+ manga_translator._copilot_translator = CopilotTranslator(
107
+ server_url=copilot_server,
108
+ model=copilot_model
109
+ )
110
+ print(f"Copilot translator initialized: {copilot_server} / {copilot_model}")
111
+
112
+ translated_texts = manga_translator._copilot_translator.translate_batch(
113
+ texts_to_translate,
114
+ source=manga_translator.source,
115
+ target=manga_translator.target
116
+ )
117
+ except Exception as e:
118
+ print(f"Copilot batch translation failed: {e}")
119
+ translated_texts = texts_to_translate # Return original on error
120
+
121
  else:
122
  # Single translation for other translators
123
  translated_texts = [manga_translator.translate(t, method=selected_translator) for t in texts_to_translate]
124
 
125
  # Phase 3: Add translated text to bubbles
126
+ # Determine correct font path based on font name
127
+ font_path = get_font_path(selected_font)
128
  for data, translated_text in zip(bubble_data, translated_texts):
129
  add_text(data['detected_image'], translated_text, font_path, data['contour'])
130
 
131
  return image
132
 
133
 
134
+ def get_font_path(font_name: str) -> str:
135
+ """Get the correct font file path based on font name."""
136
+ # Handle legacy fonts with 'i' suffix
137
+ if font_name in ["animeace_", "arial", "mangat"]:
138
+ return f"fonts/{font_name}i.ttf"
139
+ # Yuki-* fonts use exact name
140
+ elif font_name.startswith("Yuki-") or font_name.startswith("yuki-"):
141
+ return f"fonts/{font_name}.ttf"
142
+ else:
143
+ return f"fonts/{font_name}.ttf"
144
+
145
+
146
+ def process_images_with_batch(images_data, manga_translator, mocr, selected_font, translator_type, batch_size=10, use_context_memory=True):
147
+ """
148
+ Process multiple images with multi-page batching for Copilot or Gemini.
149
+ Collects all texts first, batch translates, then applies translations.
150
+
151
+ Args:
152
+ images_data: List of dicts with 'image', 'name' keys
153
+ manga_translator: MangaTranslator instance with translator
154
+ mocr: OCR engine
155
+ selected_font: Font to use
156
+ translator_type: 'copilot' or 'gemini'
157
+ batch_size: Number of pages per API call
158
+ use_context_memory: Whether to include context from all pages for better translation
159
+
160
+ Returns:
161
+ List of processed images with translations applied
162
+ """
163
+ import time
164
+ from concurrent.futures import ThreadPoolExecutor, as_completed
165
+
166
+ def emit_progress(phase, current, total, message):
167
+ """Emit progress update via WebSocket."""
168
+ try:
169
+ socketio.emit('progress', {
170
+ 'phase': phase,
171
+ 'current': current,
172
+ 'total': total,
173
+ 'message': message,
174
+ 'percent': int((current / max(total, 1)) * 100)
175
+ })
176
+ except Exception as e:
177
+ pass # Silently fail if socket not connected
178
+
179
+ total_images = len(images_data)
180
+ print(f"\n{'='*50}")
181
+ print(f"Processing {total_images} images...")
182
+ print(f"Context Memory: {'ON' if use_context_memory else 'OFF'}")
183
+ print(f"{'='*50}")
184
+
185
+ start_time = time.time()
186
+
187
+ # Check if using Chrome Lens OCR (has batch support)
188
+ use_batch_ocr = hasattr(mocr, 'process_batch')
189
+
190
+ # Phase 1a: Detect bubbles and collect all bubble images
191
+ print("\n[Phase 1] Detecting bubbles...")
192
+ emit_progress('detection', 0, total_images, 'Bắt đầu phát hiện speech bubbles...')
193
+ all_pages_data = {} # {page_name: {'image': img, 'bubbles': [...], 'bubble_images': [...]}}
194
+ all_bubble_images = [] # Flat list for batch OCR
195
+ bubble_mapping = [] # [(page_name, bubble_idx), ...] to map back
196
+
197
+ for idx, img_data in enumerate(images_data):
198
+ image = img_data['image']
199
+ name = img_data['name']
200
+
201
+ emit_progress('detection', idx + 1, total_images, f'Phát hiện bubbles: {name}')
202
+ print(f" [{idx+1}/{total_images}] {name}", end="", flush=True)
203
+
204
+ results = detect_bubbles(MODEL_PATH, image)
205
+ if not results:
206
+ all_pages_data[name] = {'image': image, 'bubbles': [], 'texts': []}
207
+ print(f" - 0 bubbles")
208
+ continue
209
+
210
+ print(f" - {len(results)} bubbles")
211
+
212
+ bubble_data = []
213
+
214
+ for bubble_idx, result in enumerate(results):
215
+ x1, y1, x2, y2, score, class_id = result
216
+ detected_image = image[int(y1):int(y2), int(x1):int(x2)]
217
+
218
+ # IMPORTANT: Add to OCR queue BEFORE processing (which fills white)
219
+ all_bubble_images.append(Image.fromarray(detected_image.copy()))
220
+ bubble_mapping.append((name, bubble_idx))
221
+
222
+ # Process bubble (fill white) - this modifies the original image via view
223
+ processed_image, cont = process_bubble(detected_image)
224
+
225
+ bubble_data.append({
226
+ 'detected_image': processed_image,
227
+ 'contour': cont,
228
+ 'coords': (int(x1), int(y1), int(x2), int(y2))
229
+ })
230
+
231
+ all_pages_data[name] = {
232
+ 'image': image,
233
+ 'bubbles': bubble_data,
234
+ 'texts': [] # Will fill after OCR
235
+ }
236
+
237
+ detection_time = time.time() - start_time
238
+ print(f"✓ Bubble detection completed in {detection_time:.1f}s ({len(all_bubble_images)} total bubbles)")
239
+ emit_progress('detection', total_images, total_images, f'Phát hiện xong {len(all_bubble_images)} bubbles')
240
+
241
+ # Phase 1b: Batch OCR all bubbles at once
242
+ if all_bubble_images:
243
+ ocr_start = time.time()
244
+ emit_progress('ocr', 0, 1, f'Đang OCR {len(all_bubble_images)} bubbles...')
245
+ print(f"\n[Phase 2] OCR processing {len(all_bubble_images)} bubbles...", end=" ", flush=True)
246
+
247
+ if use_batch_ocr:
248
+ # Use concurrent batch OCR (Chrome Lens)
249
+ all_texts = mocr.process_batch(all_bubble_images)
250
+ else:
251
+ # Sequential OCR (MangaOcr or others)
252
+ all_texts = [mocr(img) for img in all_bubble_images]
253
+
254
+ # Map texts back to pages
255
+ for (page_name, bubble_idx), text in zip(bubble_mapping, all_texts):
256
+ all_pages_data[page_name]['texts'].append(text)
257
+
258
+ ocr_time = time.time() - ocr_start
259
+ print(f"({ocr_time:.1f}s)")
260
+ print(f"✓ OCR completed in {ocr_time:.1f}s ({len(all_bubble_images)/ocr_time:.1f} bubbles/sec)")
261
+ emit_progress('ocr', 1, 1, f'OCR hoàn tất ({len(all_bubble_images)} bubbles)')
262
+
263
+ # Phase 3: Batch translate all pages together
264
+ emit_progress('translation', 0, 1, 'Đang dịch...')
265
+ pages_texts = {name: data['texts'] for name, data in all_pages_data.items() if data['texts']}
266
+ all_translations = {}
267
+
268
+ if pages_texts:
269
+ # Get the translator based on type
270
+ if translator_type == "copilot" and hasattr(manga_translator, '_copilot_translator') and manga_translator._copilot_translator:
271
+ translator = manga_translator._copilot_translator
272
+ translator_name = "Copilot"
273
+ elif translator_type == "gemini" and hasattr(manga_translator, '_gemini_translator') and manga_translator._gemini_translator:
274
+ translator = manga_translator._gemini_translator
275
+ translator_name = "Gemini"
276
+ else:
277
+ translator = None
278
+ translator_name = "Unknown"
279
+
280
+ if translator:
281
+ print(f"{translator_name} batch translating {len(pages_texts)} pages in chunks of {batch_size}...")
282
+
283
+ # Build full context from ALL pages if context memory is enabled
284
+ all_context = None
285
+ if use_context_memory:
286
+ all_context = pages_texts # Pass all texts for context
287
+ print(f" Using context from all {len(pages_texts)} pages")
288
+
289
+ # Process in batches
290
+ page_names = list(pages_texts.keys())
291
+
292
+ for i in range(0, len(page_names), batch_size):
293
+ batch_names = page_names[i:i + batch_size]
294
+ batch_texts = {name: pages_texts[name] for name in batch_names}
295
+
296
+ print(f" Translating batch {i//batch_size + 1}: pages {i+1}-{min(i+batch_size, len(page_names))}")
297
+
298
+ try:
299
+ translated = translator.translate_pages_batch(
300
+ batch_texts,
301
+ source=manga_translator.source,
302
+ target=manga_translator.target,
303
+ context=all_context if use_context_memory else None
304
+ )
305
+ all_translations.update(translated)
306
+ except Exception as e:
307
+ print(f" Batch failed: {e}, falling back to individual translation")
308
+ for name, texts in batch_texts.items():
309
+ try:
310
+ all_translations[name] = translator.translate_batch(
311
+ texts, manga_translator.source, manga_translator.target
312
+ )
313
+ except:
314
+ all_translations[name] = texts # Return original on error
315
+
316
+ translation_time = time.time() - start_time - detection_time
317
+ print(f"✓ Translation completed in {translation_time:.1f}s")
318
+ emit_progress('translation', 1, 1, 'Dịch hoàn tất')
319
+
320
+ # Phase 4: Apply translations and render text
321
+ emit_progress('rendering', 0, total_images, 'Đang render text vào ảnh...')
322
+ render_start = time.time()
323
+ processed_results = []
324
+ font_path = get_font_path(selected_font)
325
+
326
+ print(f"\n[Phase 4] Rendering text...")
327
+
328
+ render_idx = 0
329
+ for name, data in all_pages_data.items():
330
+ render_idx += 1
331
+ emit_progress('rendering', render_idx, total_images, f'Render text: {name}')
332
+
333
+ image = data['image']
334
+ bubbles = data['bubbles']
335
+ translated_texts = all_translations.get(name, data['texts']) # Fallback to original
336
+
337
+ # Apply text to bubbles on the ORIGINAL image
338
+ for bubble, text in zip(bubbles, translated_texts):
339
+ x1, y1, x2, y2 = bubble['coords']
340
+ # Get the region in the original image (this is a view, modifications affect original)
341
+ bubble_region = image[y1:y2, x1:x2]
342
+ # Fill with white first (process_bubble already did this but let's be safe)
343
+ # bubble_region[:] = (255, 255, 255) # Already done
344
+ # Add translated text
345
+ add_text(bubble_region, text, font_path, bubble['contour'])
346
+
347
+ processed_results.append({
348
+ 'image': image,
349
+ 'name': name
350
+ })
351
+
352
+ render_time = time.time() - render_start
353
+ total_time = time.time() - start_time
354
+
355
+ print(f"✓ Text rendering completed in {render_time:.1f}s")
356
+ print(f"{'='*50}")
357
+ print(f"✓ TOTAL: {total_images} images processed in {total_time:.1f}s ({total_time/total_images:.1f}s/image)")
358
+ print(f"{'='*50}\n")
359
+
360
+ emit_progress('done', total_images, total_images, f'Hoàn tất! {total_images} ảnh trong {total_time:.1f}s')
361
+
362
+ return processed_results
363
+
364
+
365
  @app.route("/translate", methods=["POST"])
366
  def upload_file():
367
  # Get translator selection
368
  translator_map = {
369
  "Opus-mt model": "hf",
370
  "NLLB": "nllb",
371
+ "Gemini": "gemini",
372
+ "Copilot": "copilot"
373
  }
374
  selected_translator = translator_map.get(
375
  request.form["selected_translator"],
376
  request.form["selected_translator"].lower()
377
  )
378
+
379
+ # Get Copilot settings if Copilot is selected
380
+ copilot_server = request.form.get("copilot_server", "http://localhost:8080")
381
+ copilot_model = request.form.get("selected_copilot_model", "gpt-4o")
382
+
383
+ # Get Gemini API key from form
384
+ gemini_api_key = request.form.get("gemini_api_key", "").strip()
385
+
386
+ # Get context memory setting (checkbox - "on" if checked, None if not)
387
+ use_context_memory = request.form.get("context_memory") == "on"
388
 
389
  # Get font selection
390
+ selected_font_raw = request.form["selected_font"]
391
+ selected_font = selected_font_raw.lower()
392
+
393
+ # Handle special font name mappings
394
+ if selected_font == "auto (match original)":
395
+ selected_font = "auto"
396
+ elif selected_font == "animeace":
397
+ selected_font = "animeace_"
398
+ elif selected_font_raw.startswith("Yuki-"):
399
+ # Keep original case for Yuki fonts
400
+ selected_font = selected_font_raw
401
 
402
  # Get OCR engine
403
  selected_ocr = request.form.get("selected_ocr", "chrome-lens").lower()
 
460
  if selected_translator == "gemini" and style:
461
  manga_translator._gemini_custom_prompt = style
462
 
463
+ # Set Gemini API key
464
+ if selected_translator == "gemini" and gemini_api_key:
465
+ manga_translator._gemini_api_key = gemini_api_key
466
+ print(f"Using Gemini API with provided key")
467
+
468
+ # Set Copilot settings
469
+ if selected_translator == "copilot":
470
+ manga_translator._copilot_server = copilot_server
471
+ manga_translator._copilot_model = copilot_model
472
+ print(f"Using Copilot API: {copilot_server} / model: {copilot_model}")
473
+
474
  if selected_ocr == "chrome-lens":
475
  mocr = ChromeLensOCR()
476
  else:
477
  mocr = MangaOcr()
478
 
479
+ # Initialize font analyzer for auto font matching
480
+ font_analyzer = None
481
+ if selected_font == "auto":
482
+ try:
483
+ from font_analyzer import FontAnalyzer
484
+ # Use same API key as Gemini translator
485
+ api_key = gemini_api_key or os.environ.get("GEMINI_API_KEY")
486
+ if not api_key:
487
+ print("Warning: No Gemini API key provided for font analysis")
488
+ font_analyzer = FontAnalyzer(api_key=api_key)
489
+ print("Font analyzer initialized for auto font matching")
490
+ except Exception as e:
491
+ print(f"Failed to initialize font analyzer: {e}")
492
+ selected_font = "animeace_" # Fallback to default
493
+
494
  # Process all images
495
  processed_images = []
496
+ auto_font_determined = False # Flag to analyze font only once
497
 
498
+ # For Copilot and Gemini: Use multi-page batch processing
499
+ if selected_translator in ["copilot", "gemini"]:
500
+ # First, read all images into memory
501
+ all_images = []
502
+ for file in files:
503
+ if file and file.filename:
504
+ try:
505
+ file_stream = file.stream
506
+ file_bytes = np.frombuffer(file_stream.read(), dtype=np.uint8)
507
+ image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
508
+
509
+ if image is None:
510
+ continue
511
+
512
+ name = os.path.splitext(file.filename)[0]
513
+ all_images.append({'image': image, 'name': name})
514
+ except Exception as e:
515
+ print(f"Error reading {file.filename}: {e}")
516
+
517
+ if not all_images:
518
+ return redirect("/")
519
+
520
+ # Auto font: analyze first image
521
+ if selected_font == "auto" and font_analyzer is not None:
522
  try:
523
+ results = detect_bubbles(MODEL_PATH, all_images[0]['image'])
524
+ if results:
525
+ x1, y1, x2, y2, _, _ = results[0]
526
+ first_bubble = all_images[0]['image'][int(y1):int(y2), int(x1):int(x2)]
527
+ selected_font = font_analyzer.analyze_and_match(first_bubble)
528
+ print(f"Auto font matched: {selected_font}")
529
+ else:
530
+ selected_font = "animeace_"
531
+ except Exception as e:
532
+ print(f"Font analysis failed: {e}")
533
+ selected_font = "animeace_"
534
+
535
+ # Initialize translator based on type
536
+ if selected_translator == "copilot":
537
+ if not hasattr(manga_translator, '_copilot_translator') or manga_translator._copilot_translator is None:
538
+ from translator.copilot_translator import CopilotTranslator
539
+ manga_translator._copilot_translator = CopilotTranslator(
540
+ server_url=copilot_server,
541
+ model=copilot_model
542
  )
543
+ print(f"Copilot translator initialized: {copilot_server} / {copilot_model}")
544
+
545
+ elif selected_translator == "gemini":
546
+ if not hasattr(manga_translator, '_gemini_translator') or manga_translator._gemini_translator is None:
547
+ from translator.gemini_translator import GeminiTranslator
548
+ api_key = gemini_api_key
549
+ if not api_key:
550
+ raise ValueError("Gemini API key required. Please enter it in the web form.")
551
+ custom_prompt = getattr(manga_translator, '_gemini_custom_prompt', None)
552
+ manga_translator._gemini_translator = GeminiTranslator(
553
+ api_key=api_key,
554
+ custom_prompt=custom_prompt
555
+ )
556
+ print("Gemini translator initialized for multi-page batching")
557
+
558
+ # Process with multi-page batching (10 pages per API call)
559
+ processed_results = process_images_with_batch(
560
+ all_images, manga_translator, mocr, selected_font,
561
+ translator_type=selected_translator, batch_size=10,
562
+ use_context_memory=use_context_memory
563
+ )
564
+
565
+ # Encode results to base64
566
+ for result in processed_results:
567
+ try:
568
+ _, buffer = cv2.imencode(".jpg", result['image'], [cv2.IMWRITE_JPEG_QUALITY, 95])
569
  encoded_image = base64.b64encode(buffer.tobytes()).decode("utf-8")
 
570
  processed_images.append({
571
+ "name": result['name'],
572
  "data": encoded_image
573
  })
 
574
  except Exception as e:
575
+ print(f"Error encoding {result['name']}: {e}")
576
+
577
+ else:
578
+ # For other translators: Use per-image processing (original flow)
579
+ for file in files:
580
+ if file and file.filename:
581
+ try:
582
+ # Read image
583
+ file_stream = file.stream
584
+ file_bytes = np.frombuffer(file_stream.read(), dtype=np.uint8)
585
+ image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
586
+
587
+ if image is None:
588
+ continue
589
+
590
+ # Auto font: analyze FIRST image only
591
+ if selected_font == "auto" and font_analyzer is not None and not auto_font_determined:
592
+ try:
593
+ results = detect_bubbles(MODEL_PATH, image)
594
+ if results:
595
+ x1, y1, x2, y2, _, _ = results[0]
596
+ first_bubble = image[int(y1):int(y2), int(x1):int(x2)]
597
+ selected_font = font_analyzer.analyze_and_match(first_bubble)
598
+ print(f"Auto font matched (once for all images): {selected_font}")
599
+ else:
600
+ selected_font = "animeace_"
601
+ except Exception as e:
602
+ print(f"Font analysis failed: {e}")
603
+ selected_font = "animeace_"
604
+ auto_font_determined = True
605
+
606
+ # Get original filename
607
+ name = os.path.splitext(file.filename)[0]
608
+
609
+ # Process image
610
+ processed_image = process_single_image(
611
+ image, manga_translator, mocr,
612
+ selected_translator, selected_font, None
613
+ )
614
+
615
+ # Encode to base64
616
+ _, buffer = cv2.imencode(".jpg", processed_image, [cv2.IMWRITE_JPEG_QUALITY, 95])
617
+ encoded_image = base64.b64encode(buffer.tobytes()).decode("utf-8")
618
+
619
+ processed_images.append({
620
+ "name": name,
621
+ "data": encoded_image
622
+ })
623
+
624
+ except Exception as e:
625
+ print(f"Error processing {file.filename}: {e}")
626
+ continue
627
 
628
  if not processed_images:
629
  return redirect("/")
 
631
  return render_template("translate.html", images=processed_images)
632
 
633
 
634
+ @app.route("/download-zip", methods=["POST"])
635
+ def download_zip():
636
+ """Create and download a ZIP file containing all translated images."""
637
+ try:
638
+ images_data = request.form.get("images_data", "[]")
639
+ images = json.loads(images_data)
640
+
641
+ if not images:
642
+ return redirect("/")
643
+
644
+ # Create ZIP file in memory
645
+ zip_buffer = io.BytesIO()
646
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
647
+ for i, img in enumerate(images):
648
+ name = img.get('name', f'image_{i+1}')
649
+ data = img.get('data', '')
650
+
651
+ # Decode base64 to bytes
652
+ image_bytes = base64.b64decode(data)
653
+
654
+ # Add to ZIP with proper filename
655
+ filename = f"{name}_translated.png"
656
+ zip_file.writestr(filename, image_bytes)
657
+
658
+ zip_buffer.seek(0)
659
+
660
+ return send_file(
661
+ zip_buffer,
662
+ mimetype='application/zip',
663
+ as_attachment=True,
664
+ download_name='manga_translated.zip'
665
+ )
666
+
667
+ except Exception as e:
668
+ print(f"Error creating ZIP: {e}")
669
+ return redirect("/")
670
+
671
+
672
  if __name__ == "__main__":
673
+ socketio.run(app, debug=True)
font_analyzer.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Font Analyzer - Analyze manga font style and match with available fonts
3
+ Uses Gemini Vision to directly select the best matching font from available options
4
+ """
5
+ import google.generativeai as genai
6
+ import json
7
+ import os
8
+ from PIL import Image
9
+ import numpy as np
10
+ from typing import Optional, Dict, Any, List
11
+
12
+
13
+ class FontAnalyzer:
14
+ """
15
+ Analyzes font style from manga speech bubbles using Gemini Vision
16
+ and directly selects the best matching font from available fonts.
17
+ """
18
+
19
+ # Available fonts with descriptions for Gemini to understand
20
+ FONT_OPTIONS = {
21
+ "animeace_": "Classic manga font, clean and readable, standard comic style",
22
+ "mangat": "Standard manga font, similar to animeace, good readability",
23
+ "arial": "Clean sans-serif, formal and professional",
24
+ "Yuki-Arenzi": "Simple casual handwritten style",
25
+ "Yuki-Burobu": "Bold brush strokes, dynamic action style, Japanese brush feel",
26
+ "Yuki-CCMarianChurchlandJournal": "Journal/diary handwritten, personal feel",
27
+ "Yuki-CDX Starstreak": "Dynamic sci-fi style, bold and futuristic",
28
+ "Yuki-CHICKEN Pie": "Playful, chunky, cute comedy style",
29
+ "Yuki-CrashLanding BB": "Heavy impact font, bold action/shouting style",
30
+ "Yuki-Downhill Dive": "Dynamic sports/action font, energetic",
31
+ "Yuki-Gingerline DEMO Regular": "Elegant flowing handwritten, romantic style",
32
+ "Yuki-Gorrilaz_Story": "Grunge alternative style, rough edges",
33
+ "Yuki-KG Only Angel": "Delicate feminine handwritten, soft romantic",
34
+ "Yuki-LF SwandsHand": "Natural handwritten, casual personal",
35
+ "Yuki-La Belle Aurore": "Elegant cursive, fancy romantic style",
36
+ "Yuki-Little Cupcakes": "Cute kawaii style, bubbly and fun",
37
+ "Yuki-Nagurigaki Crayon": "Crayon/childish handwritten, playful comedy",
38
+ "Yuki-Ripsnort BB": "Heavy bold impact, action/shouting",
39
+ "Yuki-Roasthink": "Modern clean sans-serif, general purpose",
40
+ "Yuki-Screwball": "Comic style, funny and expressive",
41
+ "Yuki-Shark Crash": "Aggressive dynamic, action manga style",
42
+ "Yuki-Skulduggery": "Gothic dark style, horror/mystery",
43
+ "Yuki-Superscratchy": "Scratchy rough handwritten, grungy feel",
44
+ "Yuki-Tea And Oranges Regular": "Soft warm handwritten, gentle drama",
45
+ }
46
+
47
+ DEFAULT_FONT = "animeace_"
48
+
49
+ def __init__(self, api_key: str = None):
50
+ """Initialize with Gemini API key."""
51
+ self.api_key = api_key or os.environ.get("GEMINI_API_KEY")
52
+ if not self.api_key:
53
+ raise ValueError("Gemini API key required. Set GEMINI_API_KEY or pass api_key.")
54
+
55
+ genai.configure(api_key=self.api_key)
56
+ self.model = genai.GenerativeModel("gemini-2.5-flash-lite")
57
+
58
+ def _image_to_pil(self, image) -> Image.Image:
59
+ """Convert various image formats to PIL Image."""
60
+ if isinstance(image, Image.Image):
61
+ return image
62
+ elif isinstance(image, np.ndarray):
63
+ import cv2
64
+ if len(image.shape) == 3 and image.shape[2] == 3:
65
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
66
+ return Image.fromarray(image)
67
+ else:
68
+ raise ValueError(f"Unsupported image type: {type(image)}")
69
+
70
+ def _build_font_list_prompt(self) -> str:
71
+ """Build the font options list for the prompt."""
72
+ lines = []
73
+ for font_name, description in self.FONT_OPTIONS.items():
74
+ lines.append(f"- {font_name}: {description}")
75
+ return "\n".join(lines)
76
+
77
+ def analyze_and_match(self, bubble_image) -> str:
78
+ """
79
+ Analyze the font in the image and directly select the best matching font.
80
+
81
+ Args:
82
+ bubble_image: Speech bubble image (PIL, numpy array)
83
+
84
+ Returns:
85
+ Font name to use
86
+ """
87
+ try:
88
+ pil_image = self._image_to_pil(bubble_image)
89
+ print(f"[FontAnalyzer] Analyzing image size: {pil_image.size}")
90
+
91
+ font_list = self._build_font_list_prompt()
92
+
93
+ prompt = f"""Look at this manga/comic speech bubble image and analyze the text font style.
94
+
95
+ Then choose the BEST matching font from this list based on visual similarity:
96
+
97
+ {font_list}
98
+
99
+ Consider these factors when matching:
100
+ 1. Font weight (thin, normal, bold, heavy)
101
+ 2. Style (clean, handwritten, decorative, brush)
102
+ 3. Mood/genre (action, comedy, romance, horror, drama, casual)
103
+ 4. Overall visual feel
104
+
105
+ Return ONLY the font name (exactly as written above), nothing else.
106
+ Example response: Yuki-Burobu"""
107
+
108
+ print("[FontAnalyzer] Sending request to Gemini Vision...")
109
+ response = self.model.generate_content([prompt, pil_image])
110
+ result = response.text.strip()
111
+
112
+ print(f"[FontAnalyzer] Gemini raw response: '{result}'")
113
+
114
+ # Clean up response
115
+ result = result.replace('"', '').replace("'", "").strip()
116
+
117
+ # Remove common prefixes that Gemini might add
118
+ prefixes_to_remove = ["The best matching font is ", "Best match: ", "Font: ", "I recommend "]
119
+ for prefix in prefixes_to_remove:
120
+ if result.lower().startswith(prefix.lower()):
121
+ result = result[len(prefix):].strip()
122
+
123
+ print(f"[FontAnalyzer] Cleaned response: '{result}'")
124
+
125
+ # Validate the result is in our font list
126
+ if result in self.FONT_OPTIONS:
127
+ print(f"[FontAnalyzer] ✓ Matched: {result}")
128
+ return result
129
+
130
+ # Try to find partial match (case-insensitive)
131
+ result_lower = result.lower()
132
+ for font_name in self.FONT_OPTIONS.keys():
133
+ if font_name.lower() == result_lower:
134
+ print(f"[FontAnalyzer] ✓ Matched (case-insensitive): {font_name}")
135
+ return font_name
136
+ if font_name.lower() in result_lower or result_lower in font_name.lower():
137
+ print(f"[FontAnalyzer] ✓ Matched (partial): {font_name}")
138
+ return font_name
139
+
140
+ print(f"[FontAnalyzer] ✗ Font not in list: '{result}', using default")
141
+ return self.DEFAULT_FONT
142
+
143
+ except Exception as e:
144
+ print(f"[FontAnalyzer] ✗ Error: {e}")
145
+ return self.DEFAULT_FONT
146
+
147
+
148
+ def get_matching_font(bubble_image, api_key: str = None) -> str:
149
+ """Quick function to analyze and match font from a bubble image."""
150
+ analyzer = FontAnalyzer(api_key)
151
+ return analyzer.analyze_and_match(bubble_image)
fonts/Yuki-Arenzi.ttf ADDED
Binary file (47.8 kB). View file
 
fonts/Yuki-Burobu.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a3b22d7035b4726304fb383cf80e2421c47cf05615d2f75143b24147bcef7a
3
+ size 176976
fonts/Yuki-CCMarianChurchlandJournal.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5c4ac2b3daf8f7062d745300b2e8dd12b2ee206db7dd427143cc3f78a8e831
3
+ size 148928
fonts/Yuki-CDX Starstreak.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:931d143968eca5b237efdba4538ddd79a8113a438c9d0b479244a660cc099973
3
+ size 152740
fonts/Yuki-CHICKEN Pie.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b1128c44ecc4819fc67615966260cca27ac68a6b21f4c2a99d697656f3cfe2
3
+ size 100624
fonts/Yuki-CrashLanding BB.ttf ADDED
Binary file (49.4 kB). View file
 
fonts/Yuki-Downhill Dive.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a19185b9489d1ff3897d178e7859676d58c1a8ab81beee9e93b662a1a8a0383d
3
+ size 345480
fonts/Yuki-Gingerline DEMO Regular.ttf ADDED
Binary file (82.5 kB). View file
 
fonts/Yuki-Gorrilaz_Story.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:331cec198091d01a819b0dfb4be4576cdc27a0774397a4ec7a9b10a527a5d161
3
+ size 115792
fonts/Yuki-KG Only Angel.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f07ee1dfc8c198e19dfe1101b2bb1d84c80592ed54fc6caf2412e50d36b22903
3
+ size 440976
fonts/Yuki-LF SwandsHand.ttf ADDED
Binary file (70.9 kB). View file
 
fonts/Yuki-La Belle Aurore.ttf ADDED
Binary file (88.9 kB). View file
 
fonts/Yuki-Little Cupcakes.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30a1f57bac5c5fcb5739008d33ede51e5ecc8a76a39f268ecbeb4b0c0e45fa68
3
+ size 114520
fonts/Yuki-Nagurigaki Crayon.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bece77302985b034d3e7beff562853e17084b87d2b2fef6ba784fdc953660586
3
+ size 5462384
fonts/Yuki-Ripsnort BB.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf74ab38ba5767007fbc4b0cf8cfa432620a22d431be3d4b944df1dd3ca1b2f3
3
+ size 115368
fonts/Yuki-Roasthink.ttf ADDED
Binary file (68.4 kB). View file
 
fonts/Yuki-Screwball.ttf ADDED
Binary file (99.1 kB). View file
 
fonts/Yuki-Shark Crash.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12195563cabdc781dffa6517ad1f029fc69dcb968e9ff49ec68f3c7216cc4c3c
3
+ size 148464
fonts/Yuki-Skulduggery.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06cbbcaa3cfcbf96482c18301130e48405c06ff205c5a225abb44d5b56f7d299
3
+ size 434812
fonts/Yuki-Superscratchy.ttf ADDED
Binary file (68.4 kB). View file
 
fonts/Yuki-Tea And Oranges Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a448eb8e6301e6059678171d36c95049723d6a3d26fcb4d33b9b62e40397df9
3
+ size 492108
ocr/__pycache__/chrome_lens_ocr.cpython-311.pyc CHANGED
Binary files a/ocr/__pycache__/chrome_lens_ocr.cpython-311.pyc and b/ocr/__pycache__/chrome_lens_ocr.cpython-311.pyc differ
 
ocr/chrome_lens_ocr.py CHANGED
@@ -17,6 +17,7 @@ class ChromeLensOCR:
17
  - Free Google Lens OCR API
18
  - Multi-language support with auto-detection
19
  - Text block segmentation for comics/manga
 
20
  """
21
 
22
  def __init__(self, ocr_language: str = "ja"):
@@ -77,6 +78,62 @@ class ChromeLensOCR:
77
  print(f"Chrome Lens OCR error: {e}")
78
  return ""
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  async def process_with_blocks(self, image) -> dict:
81
  """
82
  Process image and return text segmented into blocks.
@@ -114,3 +171,4 @@ class ChromeLensOCR:
114
 
115
  result = asyncio.run(self.process_with_blocks(image))
116
  return result.get("text_blocks", [])
 
 
17
  - Free Google Lens OCR API
18
  - Multi-language support with auto-detection
19
  - Text block segmentation for comics/manga
20
+ - Batch processing for faster multi-image OCR
21
  """
22
 
23
  def __init__(self, ocr_language: str = "ja"):
 
78
  print(f"Chrome Lens OCR error: {e}")
79
  return ""
80
 
81
+ def process_batch(self, images: list) -> list:
82
+ """
83
+ Process multiple images concurrently for faster OCR.
84
+
85
+ Args:
86
+ images: List of PIL Images or numpy arrays
87
+
88
+ Returns:
89
+ list: List of extracted texts in same order
90
+ """
91
+ # Convert numpy arrays to PIL Images
92
+ pil_images = []
93
+ for img in images:
94
+ if isinstance(img, np.ndarray):
95
+ pil_images.append(Image.fromarray(img))
96
+ else:
97
+ pil_images.append(img)
98
+
99
+ # Run batch processing
100
+ try:
101
+ loop = asyncio.get_running_loop()
102
+ import concurrent.futures
103
+ future = asyncio.run_coroutine_threadsafe(
104
+ self._process_batch(pil_images), loop
105
+ )
106
+ return future.result(timeout=120)
107
+ except RuntimeError:
108
+ if not hasattr(self, '_loop') or self._loop.is_closed():
109
+ self._loop = asyncio.new_event_loop()
110
+ return self._loop.run_until_complete(self._process_batch(pil_images))
111
+
112
+ async def _process_batch(self, images: list) -> list:
113
+ """
114
+ Async batch processing using asyncio.gather for concurrent OCR.
115
+
116
+ Args:
117
+ images: List of PIL Images
118
+
119
+ Returns:
120
+ list: List of extracted texts
121
+ """
122
+ # Process all images concurrently
123
+ tasks = [self._process(img) for img in images]
124
+ results = await asyncio.gather(*tasks, return_exceptions=True)
125
+
126
+ # Handle any exceptions
127
+ processed = []
128
+ for r in results:
129
+ if isinstance(r, Exception):
130
+ print(f"Batch OCR error: {r}")
131
+ processed.append("")
132
+ else:
133
+ processed.append(r)
134
+
135
+ return processed
136
+
137
  async def process_with_blocks(self, image) -> dict:
138
  """
139
  Process image and return text segmented into blocks.
 
171
 
172
  result = asyncio.run(self.process_with_blocks(image))
173
  return result.get("text_blocks", [])
174
+
process_bubble.py CHANGED
@@ -11,12 +11,22 @@ def process_bubble(image):
11
 
12
  Returns:
13
  - image (numpy.ndarray): Image with the speech bubble content set to white.
14
- - largest_contour (numpy.ndarray): Contour of the detected speech bubble.
15
  """
16
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
17
  _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
18
 
19
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 
 
 
 
 
 
 
 
 
 
20
  largest_contour = max(contours, key=cv2.contourArea)
21
 
22
  mask = np.zeros_like(gray)
@@ -25,3 +35,4 @@ def process_bubble(image):
25
  image[mask == 255] = (255, 255, 255)
26
 
27
  return image, largest_contour
 
 
11
 
12
  Returns:
13
  - image (numpy.ndarray): Image with the speech bubble content set to white.
14
+ - largest_contour (numpy.ndarray): Contour of the detected speech bubble (or None if not found).
15
  """
16
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
17
  _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
18
 
19
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
20
+
21
+ # Handle case when no contours found
22
+ if not contours:
23
+ # Return original image with a simple rectangular contour
24
+ h, w = image.shape[:2]
25
+ largest_contour = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype=np.int32)
26
+ # Fill with white anyway
27
+ image[:] = (255, 255, 255)
28
+ return image, largest_contour
29
+
30
  largest_contour = max(contours, key=cv2.contourArea)
31
 
32
  mask = np.zeros_like(gray)
 
35
  image[mask == 255] = (255, 255, 255)
36
 
37
  return image, largest_contour
38
+
static/css/style.css CHANGED
@@ -334,6 +334,55 @@ button:active {
334
  color: white;
335
  }
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  /* Responsive */
338
  @media (max-width: 600px) {
339
  .form-grid {
@@ -344,3 +393,4 @@ button:active {
344
  padding: 20px;
345
  }
346
  }
 
 
334
  color: white;
335
  }
336
 
337
+ /* Toggle Switch */
338
+ .toggle-container {
339
+ display: flex;
340
+ align-items: center;
341
+ cursor: pointer;
342
+ gap: 12px;
343
+ user-select: none;
344
+ }
345
+
346
+ .toggle-container input {
347
+ display: none;
348
+ }
349
+
350
+ .toggle-slider {
351
+ position: relative;
352
+ width: 50px;
353
+ height: 26px;
354
+ background-color: #ccc;
355
+ border-radius: 26px;
356
+ transition: background-color 0.3s;
357
+ flex-shrink: 0;
358
+ }
359
+
360
+ .toggle-slider::before {
361
+ content: '';
362
+ position: absolute;
363
+ width: 22px;
364
+ height: 22px;
365
+ border-radius: 50%;
366
+ background-color: white;
367
+ top: 2px;
368
+ left: 2px;
369
+ transition: transform 0.3s;
370
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
371
+ }
372
+
373
+ .toggle-container input:checked + .toggle-slider {
374
+ background-color: #5E1675;
375
+ }
376
+
377
+ .toggle-container input:checked + .toggle-slider::before {
378
+ transform: translateX(24px);
379
+ }
380
+
381
+ .toggle-label {
382
+ font-size: 13px;
383
+ color: #333;
384
+ }
385
+
386
  /* Responsive */
387
  @media (max-width: 600px) {
388
  .form-grid {
 
393
  padding: 20px;
394
  }
395
  }
396
+
static/js/app.js CHANGED
@@ -38,6 +38,23 @@ document.addEventListener("DOMContentLoaded", () => {
38
  customWrapper.style.display = 'none';
39
  }
40
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  });
42
  });
43
 
@@ -49,6 +66,33 @@ document.addEventListener("DOMContentLoaded", () => {
49
  }
50
  });
51
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  });
53
 
54
  // Handles multiple file upload change event
@@ -108,6 +152,17 @@ function updateHiddenInputs() {
108
  document.getElementById("selected_style").value = getSelectedText("style");
109
  document.getElementById("selected_font").value = getSelectedText("font");
110
  document.getElementById("selected_ocr").value = getSelectedText("ocr");
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  // Check if files are selected
113
  const files = document.getElementById('file-upload').files;
 
38
  customWrapper.style.display = 'none';
39
  }
40
  }
41
+
42
+ // Show/hide translator-specific settings
43
+ if (selectBox.id === 'translator') {
44
+ const copilotSettings = document.getElementById('copilot-settings');
45
+ const geminiSettings = document.getElementById('gemini-settings');
46
+
47
+ if (option.textContent === 'Copilot') {
48
+ copilotSettings.style.display = 'block';
49
+ geminiSettings.style.display = 'none';
50
+ } else if (option.textContent === 'Gemini') {
51
+ copilotSettings.style.display = 'none';
52
+ geminiSettings.style.display = 'block';
53
+ } else {
54
+ copilotSettings.style.display = 'none';
55
+ geminiSettings.style.display = 'none';
56
+ }
57
+ }
58
  });
59
  });
60
 
 
66
  }
67
  });
68
  });
69
+
70
+ // Load saved Gemini API key from localStorage
71
+ const geminiKeyInput = document.getElementById('gemini_api_key');
72
+ if (geminiKeyInput) {
73
+ const savedKey = localStorage.getItem('gemini_api_key');
74
+ if (savedKey) {
75
+ geminiKeyInput.value = savedKey;
76
+ }
77
+
78
+ // Save to localStorage on input change
79
+ geminiKeyInput.addEventListener('input', () => {
80
+ localStorage.setItem('gemini_api_key', geminiKeyInput.value);
81
+ });
82
+ }
83
+
84
+ // Load saved Copilot server URL from localStorage
85
+ const copilotServerInput = document.getElementById('copilot_server');
86
+ if (copilotServerInput) {
87
+ const savedServer = localStorage.getItem('copilot_server');
88
+ if (savedServer) {
89
+ copilotServerInput.value = savedServer;
90
+ }
91
+
92
+ copilotServerInput.addEventListener('input', () => {
93
+ localStorage.setItem('copilot_server', copilotServerInput.value);
94
+ });
95
+ }
96
  });
97
 
98
  // Handles multiple file upload change event
 
152
  document.getElementById("selected_style").value = getSelectedText("style");
153
  document.getElementById("selected_font").value = getSelectedText("font");
154
  document.getElementById("selected_ocr").value = getSelectedText("ocr");
155
+ document.getElementById("selected_copilot_model").value = getSelectedText("copilot_model");
156
+
157
+ // Validate Gemini API key if Gemini is selected
158
+ const translator = getSelectedText("translator");
159
+ if (translator === 'Gemini') {
160
+ const apiKey = document.getElementById('gemini_api_key').value;
161
+ if (!apiKey || apiKey.trim() === '') {
162
+ alert('Vui lòng nhập Gemini API Key!');
163
+ return false;
164
+ }
165
+ }
166
 
167
  // Check if files are selected
168
  const files = document.getElementById('file-upload').files;
templates/index.html CHANGED
@@ -70,6 +70,7 @@
70
  </div>
71
  <div class="options">
72
  <span class="option">Gemini</span>
 
73
  <span class="option">Google</span>
74
  <span class="option">NLLB</span>
75
  <span class="option">Baidu</span>
@@ -107,9 +108,31 @@
107
  <span class="icon">&#9660;</span>
108
  </div>
109
  <div class="options">
 
110
  <span class="option">Animeace</span>
111
  <span class="option">Mangat</span>
112
  <span class="option">Arial</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  </div>
114
  </div>
115
  </div>
@@ -129,6 +152,15 @@
129
  </div>
130
  </div>
131
 
 
 
 
 
 
 
 
 
 
132
  <!-- Custom Prompt (show when Custom selected) -->
133
  <div class="select-wrapper full-width" id="custom-prompt-wrapper" style="display: none;">
134
  <label class="translator-label">Custom Prompt</label>
@@ -136,6 +168,67 @@
136
  placeholder="Ví dụ: Dịch theo phong cách light novel, giữ nguyên tên nhân vật..." rows="2"></textarea>
137
  </div>
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  <!-- File upload -->
140
  <input id="file-upload" type="file" name="files" accept=".jpg, .jpeg, .png" multiple required>
141
  <label for="file-upload" class="file" id="file-label">
@@ -149,13 +242,68 @@
149
  <input type="hidden" id="selected_style" name="selected_style">
150
  <input type="hidden" id="selected_font" name="selected_font">
151
  <input type="hidden" id="selected_ocr" name="selected_ocr">
 
152
  <button type="submit">Translate</button>
153
  </form>
 
 
 
 
 
 
 
 
 
 
 
 
154
  <img id="loading-img" src="{{ url_for('static', filename='img/loading.gif') }}" alt="">
155
  <p id="loading-p">Đang xử lý... Vui lòng đợi!</p>
156
  </div>
157
 
 
 
158
  <script src="{{ url_for('static', filename='js/app.js') }}"></script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  </body>
160
 
161
  </html>
 
70
  </div>
71
  <div class="options">
72
  <span class="option">Gemini</span>
73
+ <span class="option">Copilot</span>
74
  <span class="option">Google</span>
75
  <span class="option">NLLB</span>
76
  <span class="option">Baidu</span>
 
108
  <span class="icon">&#9660;</span>
109
  </div>
110
  <div class="options">
111
+ <span class="option">Auto (Match Original)</span>
112
  <span class="option">Animeace</span>
113
  <span class="option">Mangat</span>
114
  <span class="option">Arial</span>
115
+ <span class="option">Yuki-Arenzi</span>
116
+ <span class="option">Yuki-Burobu</span>
117
+ <span class="option">Yuki-CCMarianChurchlandJournal</span>
118
+ <span class="option">Yuki-CDX Starstreak</span>
119
+ <span class="option">Yuki-CHICKEN Pie</span>
120
+ <span class="option">Yuki-CrashLanding BB</span>
121
+ <span class="option">Yuki-Downhill Dive</span>
122
+ <span class="option">Yuki-Gingerline DEMO Regular</span>
123
+ <span class="option">Yuki-Gorrilaz_Story</span>
124
+ <span class="option">Yuki-KG Only Angel</span>
125
+ <span class="option">Yuki-LF SwandsHand</span>
126
+ <span class="option">Yuki-La Belle Aurore</span>
127
+ <span class="option">Yuki-Little Cupcakes</span>
128
+ <span class="option">Yuki-Nagurigaki Crayon</span>
129
+ <span class="option">Yuki-Ripsnort BB</span>
130
+ <span class="option">Yuki-Roasthink</span>
131
+ <span class="option">Yuki-Screwball</span>
132
+ <span class="option">Yuki-Shark Crash</span>
133
+ <span class="option">Yuki-Skulduggery</span>
134
+ <span class="option">Yuki-Superscratchy</span>
135
+ <span class="option">Yuki-Tea And Oranges Regular</span>
136
  </div>
137
  </div>
138
  </div>
 
152
  </div>
153
  </div>
154
 
155
+ <!-- Context Memory Toggle -->
156
+ <div class="select-wrapper full-width" style="margin-top: 10px;">
157
+ <label class="toggle-container">
158
+ <input type="checkbox" id="context_memory" name="context_memory" checked>
159
+ <span class="toggle-slider"></span>
160
+ <span class="toggle-label">🧠 Context Memory (dùng context từ tất cả ảnh để dịch chính xác hơn)</span>
161
+ </label>
162
+ </div>
163
+
164
  <!-- Custom Prompt (show when Custom selected) -->
165
  <div class="select-wrapper full-width" id="custom-prompt-wrapper" style="display: none;">
166
  <label class="translator-label">Custom Prompt</label>
 
168
  placeholder="Ví dụ: Dịch theo phong cách light novel, giữ nguyên tên nhân vật..." rows="2"></textarea>
169
  </div>
170
 
171
+ <!-- Copilot Settings (show when Copilot selected) -->
172
+ <div id="copilot-settings" style="display: none; width: 100%;">
173
+ <div class="form-grid">
174
+ <div class="select-wrapper">
175
+ <label class="translator-label">Copilot Server URL</label>
176
+ <input type="text" id="copilot_server" name="copilot_server" value="http://localhost:8080"
177
+ placeholder="http://localhost:8080"
178
+ style="width: 100%; padding: 10px 14px; border: 1px solid #ddd; border-radius: 8px; font-size: 14px;">
179
+ </div>
180
+ <div class="select-wrapper">
181
+ <label class="translator-label">Model</label>
182
+ <div class="custom-select" id="copilot_model" tabindex="0">
183
+ <div class="select-box">
184
+ <span class="selected"></span>
185
+ <span class="icon">&#9660;</span>
186
+ </div>
187
+ <div class="options">
188
+ <!-- ⭐ FREE Unlimited Models -->
189
+ <span class="option">gpt-4.1</span>
190
+ <span class="option">gpt-4o</span>
191
+ <span class="option">gpt-5-mini</span>
192
+ <span class="option">grok-code-fast-1</span>
193
+ <span class="option">oswe-vscode-prime</span>
194
+ <!-- Other Models -->
195
+ <span class="option">gpt-5</span>
196
+ <span class="option">gpt-5.1</span>
197
+ <span class="option">gpt-5.1-codex</span>
198
+ <span class="option">gpt-5.1-codex-mini</span>
199
+ <span class="option">gpt-5.1-codex-max</span>
200
+ <span class="option">gpt-5-codex</span>
201
+ <span class="option">gpt-41-copilot</span>
202
+ <span class="option">gpt-4o-mini</span>
203
+ <span class="option">gpt-4o-2024-11-20</span>
204
+ <span class="option">gpt-4</span>
205
+ <span class="option">gpt-4-0125-preview</span>
206
+ <span class="option">gpt-3.5-turbo</span>
207
+ <span class="option">claude-sonnet-4.5</span>
208
+ <span class="option">claude-sonnet-4</span>
209
+ <span class="option">claude-opus-4.5</span>
210
+ <span class="option">claude-haiku-4.5</span>
211
+ <span class="option">gemini-3-pro-preview</span>
212
+ <span class="option">gemini-2.5-pro</span>
213
+ </div>
214
+ </div>
215
+ </div>
216
+ </div>
217
+ </div>
218
+
219
+ <!-- Gemini Settings (show when Gemini selected) -->
220
+ <div id="gemini-settings" style="display: block; width: 100%;">
221
+ <div class="select-wrapper">
222
+ <label class="translator-label">Gemini API Key</label>
223
+ <input type="password" id="gemini_api_key" name="gemini_api_key"
224
+ placeholder="Nhập API key của bạn (lấy từ ai.google.dev)"
225
+ style="width: 100%; padding: 10px 14px; border: 1px solid #ddd; border-radius: 8px; font-size: 14px;">
226
+ <small style="color: #666; font-size: 12px; margin-top: 4px; display: block;">
227
+ 🔒 Key được lưu trong trình duyệt của bạn (localStorage)
228
+ </small>
229
+ </div>
230
+ </div>
231
+
232
  <!-- File upload -->
233
  <input id="file-upload" type="file" name="files" accept=".jpg, .jpeg, .png" multiple required>
234
  <label for="file-upload" class="file" id="file-label">
 
242
  <input type="hidden" id="selected_style" name="selected_style">
243
  <input type="hidden" id="selected_font" name="selected_font">
244
  <input type="hidden" id="selected_ocr" name="selected_ocr">
245
+ <input type="hidden" id="selected_copilot_model" name="selected_copilot_model">
246
  <button type="submit">Translate</button>
247
  </form>
248
+
249
+ <!-- Progress Bar -->
250
+ <div id="progress-container" style="display: none; margin-top: 20px;">
251
+ <div id="progress-phase" style="font-size: 12px; color: #666; margin-bottom: 5px; text-align: center;"></div>
252
+ <div style="background: #e0e0e0; border-radius: 10px; overflow: hidden; height: 20px;">
253
+ <div id="progress-bar"
254
+ style="height: 100%; background: linear-gradient(90deg, #5E1675, #8e44ad); width: 0%; transition: width 0.3s ease;">
255
+ </div>
256
+ </div>
257
+ <div id="progress-text" style="font-size: 13px; color: #333; margin-top: 8px; text-align: center;"></div>
258
+ </div>
259
+
260
  <img id="loading-img" src="{{ url_for('static', filename='img/loading.gif') }}" alt="">
261
  <p id="loading-p">Đang xử lý... Vui lòng đợi!</p>
262
  </div>
263
 
264
+ <!-- Socket.IO for real-time progress -->
265
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.7.2/socket.io.min.js"></script>
266
  <script src="{{ url_for('static', filename='js/app.js') }}"></script>
267
+ <script>
268
+ // Real-time progress updates
269
+ document.addEventListener('DOMContentLoaded', function () {
270
+ const socket = io();
271
+ const progressContainer = document.getElementById('progress-container');
272
+ const progressBar = document.getElementById('progress-bar');
273
+ const progressText = document.getElementById('progress-text');
274
+ const progressPhase = document.getElementById('progress-phase');
275
+
276
+ const phaseNames = {
277
+ 'detection': '🔍 Phát hiện bubbles',
278
+ 'ocr': '📖 OCR nhận dạng text',
279
+ 'translation': '🌐 Dịch văn bản',
280
+ 'rendering': '✏️ Render text vào ảnh',
281
+ 'done': '✅ Hoàn tất'
282
+ };
283
+
284
+ socket.on('progress', function (data) {
285
+ progressContainer.style.display = 'block';
286
+
287
+ const phaseName = phaseNames[data.phase] || data.phase;
288
+ progressPhase.textContent = phaseName;
289
+ progressBar.style.width = data.percent + '%';
290
+ progressText.textContent = data.message;
291
+
292
+ if (data.phase === 'done') {
293
+ progressBar.style.background = 'linear-gradient(90deg, #50C878, #2ecc71)';
294
+ }
295
+ });
296
+
297
+ // Show progress when form submitted
298
+ document.querySelector('form').addEventListener('submit', function () {
299
+ progressContainer.style.display = 'block';
300
+ progressBar.style.width = '0%';
301
+ progressBar.style.background = 'linear-gradient(90deg, #5E1675, #8e44ad)';
302
+ progressText.textContent = 'Khởi tạo...';
303
+ progressPhase.textContent = '⏳ Chuẩn bị';
304
+ });
305
+ });
306
+ </script>
307
  </body>
308
 
309
  </html>
templates/translate.html CHANGED
@@ -36,10 +36,15 @@
36
  </div>
37
 
38
  <div class="buttons_image">
39
- <a href="#" class="green" id="download-all">📦 Download All</a>
40
  <a href="/" class="red">← Quay lại</a>
41
  </div>
42
 
 
 
 
 
 
43
  </body>
44
  <script>
45
  // Download single image
@@ -55,14 +60,22 @@
55
  });
56
  });
57
 
58
- // Download all images
59
- document.getElementById('download-all').addEventListener('click', (e) => {
60
  e.preventDefault();
61
- document.querySelectorAll('.download-btn').forEach((btn, index) => {
62
- setTimeout(() => {
63
- btn.click();
64
- }, index * 300); // Delay between downloads
 
 
 
 
65
  });
 
 
 
 
66
  });
67
  </script>
68
 
 
36
  </div>
37
 
38
  <div class="buttons_image">
39
+ <a href="#" class="green" id="download-zip">📦 Download ZIP</a>
40
  <a href="/" class="red">← Quay lại</a>
41
  </div>
42
 
43
+ <!-- Hidden form for ZIP download -->
44
+ <form id="zip-form" action="/download-zip" method="POST" style="display: none;">
45
+ <input type="hidden" name="images_data" id="images-data">
46
+ </form>
47
+
48
  </body>
49
  <script>
50
  // Download single image
 
60
  });
61
  });
62
 
63
+ // Download all images as ZIP
64
+ document.getElementById('download-zip').addEventListener('click', (e) => {
65
  e.preventDefault();
66
+
67
+ // Collect all images data
68
+ const images = [];
69
+ document.querySelectorAll('.download-btn').forEach(btn => {
70
+ images.push({
71
+ name: btn.getAttribute('data-name'),
72
+ data: btn.getAttribute('data-image')
73
+ });
74
  });
75
+
76
+ // Submit form with images data
77
+ document.getElementById('images-data').value = JSON.stringify(images);
78
+ document.getElementById('zip-form').submit();
79
  });
80
  </script>
81
 
translator/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (345 Bytes). View file
 
translator/__pycache__/copilot_translator.cpython-311.pyc ADDED
Binary file (16.2 kB). View file
 
translator/__pycache__/gemini_translator.cpython-311.pyc CHANGED
Binary files a/translator/__pycache__/gemini_translator.cpython-311.pyc and b/translator/__pycache__/gemini_translator.cpython-311.pyc differ
 
translator/__pycache__/translator.cpython-311.pyc CHANGED
Binary files a/translator/__pycache__/translator.cpython-311.pyc and b/translator/__pycache__/translator.cpython-311.pyc differ
 
translator/copilot_translator.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Copilot API Translator
3
+ Uses copilot-api proxy server (OpenAI-compatible endpoint)
4
+ https://github.com/ericc-ch/copilot-api
5
+ """
6
+ import requests
7
+ import json
8
+ from typing import List
9
+
10
+
11
+ class CopilotTranslator:
12
+ """
13
+ Translator using Copilot API proxy server.
14
+ Communicates via OpenAI-compatible /v1/chat/completions endpoint.
15
+ """
16
+
17
+ LANG_NAMES = {
18
+ "ja": "Japanese",
19
+ "zh": "Chinese",
20
+ "ko": "Korean",
21
+ "en": "English",
22
+ "vi": "Vietnamese",
23
+ "th": "Thai",
24
+ "id": "Indonesian",
25
+ "fr": "French",
26
+ "de": "German",
27
+ "es": "Spanish",
28
+ "ru": "Russian"
29
+ }
30
+
31
+ # Available models (from Copilot API)
32
+ MODELS = [
33
+ # GPT-5 Series
34
+ "gpt-5",
35
+ "gpt-5-mini",
36
+ "gpt-5.1",
37
+ "gpt-5.1-codex",
38
+ "gpt-5.1-codex-mini",
39
+ "gpt-5.1-codex-max",
40
+ "gpt-5-codex",
41
+ # GPT-4.1 Series
42
+ "gpt-4.1",
43
+ "gpt-41-copilot",
44
+ # GPT-4o Series
45
+ "gpt-4o",
46
+ "gpt-4o-mini",
47
+ "gpt-4o-2024-11-20",
48
+ # GPT-4 Series
49
+ "gpt-4",
50
+ "gpt-4-0125-preview",
51
+ # GPT-3.5
52
+ "gpt-3.5-turbo",
53
+ # Claude Series
54
+ "claude-sonnet-4.5",
55
+ "claude-sonnet-4",
56
+ "claude-opus-4.5",
57
+ "claude-haiku-4.5",
58
+ # Gemini
59
+ "gemini-3-pro-preview",
60
+ "gemini-2.5-pro",
61
+ # Other
62
+ "grok-code-fast-1",
63
+ ]
64
+
65
+ def __init__(self, server_url: str = "http://localhost:8080", model: str = "gpt-4o"):
66
+ """
67
+ Initialize Copilot translator.
68
+
69
+ Args:
70
+ server_url: Copilot API proxy server URL (e.g., http://localhost:8080)
71
+ model: Model to use (e.g., gpt-4o, claude-3.5-sonnet)
72
+ """
73
+ self.base_url = server_url.rstrip("/")
74
+ self.model = model
75
+ self.endpoint = f"{self.base_url}/v1/chat/completions"
76
+
77
+ def translate_single(self, text: str, source: str = "ja", target: str = "en") -> str:
78
+ """Translate a single text string."""
79
+ if not text or not text.strip():
80
+ return text
81
+
82
+ source_name = self.LANG_NAMES.get(source, "Japanese")
83
+ target_name = self.LANG_NAMES.get(target, "English")
84
+
85
+ prompt = f"""You are an expert manga/comic translator. Translate the following {source_name} text to {target_name}.
86
+
87
+ Rules:
88
+ - Translate for SPOKEN dialogue, natural when read aloud
89
+ - Preserve tone, emotion, and personality
90
+ - For Vietnamese: use appropriate pronouns based on context
91
+ - Return ONLY the translated text, nothing else
92
+
93
+ Text: {text}"""
94
+
95
+ try:
96
+ response = requests.post(
97
+ self.endpoint,
98
+ json={
99
+ "model": self.model,
100
+ "messages": [{"role": "user", "content": prompt}],
101
+ "temperature": 0.3,
102
+ },
103
+ timeout=30
104
+ )
105
+ response.raise_for_status()
106
+ result = response.json()
107
+ return result["choices"][0]["message"]["content"].strip()
108
+ except Exception as e:
109
+ print(f"Copilot translation error: {e}")
110
+ return text
111
+
112
+ def translate_batch(self, texts: List[str], source: str = "ja", target: str = "en") -> List[str]:
113
+ """
114
+ Translate multiple texts in a single API call.
115
+
116
+ Args:
117
+ texts: List of texts to translate
118
+ source: Source language code
119
+ target: Target language code
120
+
121
+ Returns:
122
+ List of translated texts (same order)
123
+ """
124
+ if not texts:
125
+ return []
126
+
127
+ # Filter empty texts
128
+ indexed_texts = [(i, t) for i, t in enumerate(texts) if t and t.strip()]
129
+ if not indexed_texts:
130
+ return texts
131
+
132
+ texts_to_translate = [t for _, t in indexed_texts]
133
+
134
+ source_name = self.LANG_NAMES.get(source, "Japanese")
135
+ target_name = self.LANG_NAMES.get(target, "English")
136
+
137
+ prompt = f"""You are an expert manga/comic translator. Translate the following {source_name} texts to {target_name}.
138
+
139
+ Rules:
140
+ - These are speech bubble texts from the SAME comic page - maintain consistency
141
+ - Translate for SPOKEN dialogue, natural when read aloud
142
+ - Preserve tone, emotion, and personality
143
+ - For Vietnamese: use appropriate pronouns based on context
144
+ - Keep short lines impactful
145
+
146
+ Input (JSON array of texts):
147
+ {json.dumps(texts_to_translate, ensure_ascii=False)}
148
+
149
+ Return ONLY a JSON array with translated texts in the EXACT same order.
150
+ Example: ["translation 1", "translation 2"]"""
151
+
152
+ try:
153
+ response = requests.post(
154
+ self.endpoint,
155
+ json={
156
+ "model": self.model,
157
+ "messages": [{"role": "user", "content": prompt}],
158
+ "temperature": 0.3,
159
+ },
160
+ timeout=60
161
+ )
162
+ response.raise_for_status()
163
+ result = response.json()
164
+ result_text = result["choices"][0]["message"]["content"].strip()
165
+
166
+ # Clean up response
167
+ if result_text.startswith("```json"):
168
+ result_text = result_text[7:]
169
+ if result_text.startswith("```"):
170
+ result_text = result_text[3:]
171
+ if result_text.endswith("```"):
172
+ result_text = result_text[:-3]
173
+ result_text = result_text.strip()
174
+
175
+ translations = json.loads(result_text)
176
+
177
+ # Validate length
178
+ if len(translations) != len(texts_to_translate):
179
+ print(f"Warning: Expected {len(texts_to_translate)} translations, got {len(translations)}")
180
+ # Pad or truncate
181
+ while len(translations) < len(texts_to_translate):
182
+ translations.append(texts_to_translate[len(translations)])
183
+ translations = translations[:len(texts_to_translate)]
184
+
185
+ # Rebuild full list
186
+ result_list = list(texts)
187
+ for (orig_idx, _), trans in zip(indexed_texts, translations):
188
+ result_list[orig_idx] = trans
189
+
190
+ return result_list
191
+
192
+ except Exception as e:
193
+ print(f"Copilot batch translation error: {e}")
194
+ # Fallback to single translations
195
+ return [self.translate_single(t, source, target) for t in texts]
196
+
197
+ def translate_pages_batch(
198
+ self,
199
+ pages_texts: dict,
200
+ source: str = "ja",
201
+ target: str = "en",
202
+ context: dict = None
203
+ ) -> dict:
204
+ """
205
+ Translate texts from multiple pages in a single API call.
206
+ Ideal for batch processing 10+ manga pages at once.
207
+
208
+ Args:
209
+ pages_texts: Dict mapping page names to list of texts
210
+ e.g., {"page1": ["text1", "text2"], "page2": ["text3"]}
211
+ source: Source language code
212
+ target: Target language code
213
+ context: Optional dict of ALL page texts for context (helps maintain consistency)
214
+
215
+ Returns:
216
+ Dict with same structure but translated texts
217
+ """
218
+ if not pages_texts:
219
+ return {}
220
+
221
+ source_name = self.LANG_NAMES.get(source, "Japanese")
222
+ target_name = self.LANG_NAMES.get(target, "English")
223
+
224
+ # Build context section if context is provided
225
+ context_section = ""
226
+ if context and context != pages_texts:
227
+ # Show summary of other pages for context
228
+ other_pages = {k: v for k, v in context.items() if k not in pages_texts}
229
+ if other_pages:
230
+ context_preview = []
231
+ for page, texts in list(other_pages.items())[:5]: # First 5 pages for context
232
+ context_preview.append(f"{page}: {' | '.join(texts[:3])}...")
233
+ context_section = f"""
234
+ STORY CONTEXT (from other pages in this batch - use for character/tone consistency):
235
+ {chr(10).join(context_preview)}
236
+ ---
237
+ """
238
+
239
+ prompt = f"""You are an expert manga/comic translator. Translate the following {source_name} texts to {target_name}.
240
+ {context_section}
241
+ Context: These are SEQUENTIAL comic pages telling a continuous story. Maintain narrative flow and character voice consistency across all pages.
242
+
243
+ Rules:
244
+ - Translate for SPOKEN dialogue - it must sound natural when read aloud
245
+ - Each character should have a consistent voice/speaking style across pages
246
+ - Preserve tone, emotion, and personality through careful word choice
247
+ - For Vietnamese: Choose appropriate pronouns based on character relationships
248
+ - Keep short lines impactful. Don't pad or over-explain.
249
+
250
+ Input (JSON - sequential pages with their speech bubbles):
251
+ {json.dumps(pages_texts, ensure_ascii=False, indent=2)}
252
+
253
+ IMPORTANT: Return ONLY a valid JSON object with the exact same structure but with translated texts.
254
+ Keep page names and bubble order exactly the same. No explanations or markdown."""
255
+
256
+ try:
257
+ response = requests.post(
258
+ self.endpoint,
259
+ json={
260
+ "model": self.model,
261
+ "messages": [{"role": "user", "content": prompt}],
262
+ "temperature": 0.3,
263
+ },
264
+ timeout=120 # Longer timeout for multi-page batch
265
+ )
266
+ response.raise_for_status()
267
+ result = response.json()
268
+ result_text = result["choices"][0]["message"]["content"].strip()
269
+
270
+ # Clean up response
271
+ if result_text.startswith("```json"):
272
+ result_text = result_text[7:]
273
+ if result_text.startswith("```"):
274
+ result_text = result_text[3:]
275
+ if result_text.endswith("```"):
276
+ result_text = result_text[:-3]
277
+ result_text = result_text.strip()
278
+
279
+ translated = json.loads(result_text)
280
+ print(f"✓ Translated {len(pages_texts)} pages in single batch")
281
+ return translated
282
+
283
+ except Exception as e:
284
+ print(f"Copilot pages batch translation error: {e}")
285
+ # Fallback: translate each page separately
286
+ result = {}
287
+ for page_name, texts in pages_texts.items():
288
+ result[page_name] = self.translate_batch(texts, source, target)
289
+ return result
290
+
291
+ def test_connection(self) -> bool:
292
+ """Test if the server is reachable."""
293
+ try:
294
+ response = requests.get(f"{self.base_url}/v1/models", timeout=5)
295
+ return response.status_code == 200
296
+ except:
297
+ return False
298
+
299
+ def get_available_models(self) -> List[str]:
300
+ """Get list of available models from server."""
301
+ try:
302
+ response = requests.get(f"{self.base_url}/v1/models", timeout=5)
303
+ if response.status_code == 200:
304
+ data = response.json()
305
+ return [m["id"] for m in data.get("data", [])]
306
+ except:
307
+ pass
308
+ return self.MODELS # Return default list
309
+
310
+
311
+ def translate_manga_pages_batch(
312
+ pages_texts: dict,
313
+ server_url: str = "http://localhost:8080",
314
+ model: str = "gpt-4o",
315
+ source_lang: str = "ja",
316
+ target_lang: str = "en",
317
+ batch_size: int = 10
318
+ ) -> dict:
319
+ """
320
+ Translate manga pages in batches.
321
+
322
+ Args:
323
+ pages_texts: All pages' texts {page_name: [texts]}
324
+ server_url: Copilot API server URL
325
+ model: Model to use
326
+ source_lang: Source language code
327
+ target_lang: Target language code
328
+ batch_size: Number of pages per API call (default: 10)
329
+
330
+ Returns:
331
+ All translated texts
332
+ """
333
+ translator = CopilotTranslator(server_url=server_url, model=model)
334
+
335
+ page_names = list(pages_texts.keys())
336
+ all_results = {}
337
+
338
+ # Process in batches
339
+ for i in range(0, len(page_names), batch_size):
340
+ batch_pages = page_names[i:i + batch_size]
341
+ batch_texts = {name: pages_texts[name] for name in batch_pages}
342
+
343
+ print(f"Translating pages {i+1} to {min(i+batch_size, len(page_names))}...")
344
+ batch_results = translator.translate_pages_batch(
345
+ batch_texts,
346
+ source=source_lang,
347
+ target=target_lang
348
+ )
349
+ all_results.update(batch_results)
350
+
351
+ return all_results
translator/gemini_translator.py CHANGED
@@ -6,8 +6,13 @@ Supports multiple source languages and custom prompts
6
  import google.generativeai as genai
7
  import json
8
  import os
 
9
  from typing import List, Dict, Optional
10
 
 
 
 
 
11
 
12
  class GeminiTranslator:
13
  """
@@ -32,13 +37,13 @@ class GeminiTranslator:
32
  # Preset style templates
33
  STYLE_PRESETS = {
34
  "default": "",
35
- "formal": "Use formal language and polite expressions.",
36
- "casual": "Use casual, friendly language like talking to friends.",
37
- "keep_honorifics": "Keep Japanese honorifics like -san, -kun, -chan, -sama, senpai, sensei.",
38
- "localize": "Fully localize the text, replace cultural references with equivalent ones in target language.",
39
- "literal": "Translate as literally as possible while maintaining readability.",
40
- "web_novel": "Use web novel translation style with dramatic expressions.",
41
- "action": "Use punchy, short sentences suitable for action scenes.",
42
  }
43
 
44
  def __init__(self, api_key: str = None, custom_prompt: str = None, style: str = "default"):
@@ -97,11 +102,19 @@ class GeminiTranslator:
97
  style = custom_prompt or self.custom_prompt
98
  style_text = f"\nStyle: {style}" if style else ""
99
 
100
- prompt = f"""Translate the following {source_name} comic/manga text to {target_name}.
101
- Keep the translation natural and suitable for comic dialogue.{style_text}
102
- Only return the translated text, nothing else.
 
 
 
 
 
 
 
 
103
 
104
- Text: {text}"""
105
 
106
  try:
107
  response = self.model.generate_content(prompt)
@@ -118,7 +131,7 @@ Text: {text}"""
118
  custom_prompt: str = None
119
  ) -> List[str]:
120
  """
121
- Translate multiple texts in a single API call.
122
 
123
  Args:
124
  texts: List of texts to translate
@@ -138,55 +151,101 @@ Text: {text}"""
138
  if not indexed_texts:
139
  return texts
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  source_name = self.LANG_NAMES.get(source, "Japanese")
142
  target_name = self.LANG_NAMES.get(target, "English")
143
- texts_to_translate = [t for _, t in indexed_texts]
144
 
145
  style = custom_prompt or self.custom_prompt
146
  style_text = f"\nStyle instructions: {style}" if style else ""
147
 
148
- prompt = f"""You are a professional comic/manga translator. Translate the following {source_name} texts to {target_name}.
149
- Keep translations natural and suitable for comic speech bubbles.{style_text}
150
 
151
- Input texts (JSON array):
 
 
 
 
 
 
 
 
 
 
 
 
152
  {json.dumps(texts_to_translate, ensure_ascii=False)}
153
 
154
- IMPORTANT: Return ONLY a JSON array with translated texts in the same order. No explanations.
155
- Example output format: ["translated text 1", "translated text 2", ...]"""
156
 
157
- try:
158
- response = self.model.generate_content(prompt)
159
- result_text = response.text.strip()
160
-
161
- # Clean up response if needed
162
- if result_text.startswith("```json"):
163
- result_text = result_text[7:]
164
- if result_text.startswith("```"):
165
- result_text = result_text[3:]
166
- if result_text.endswith("```"):
167
- result_text = result_text[:-3]
168
- result_text = result_text.strip()
169
-
170
- translations = json.loads(result_text)
171
-
172
- # Rebuild full list with original empty strings preserved
173
- result = list(texts)
174
- for (orig_idx, _), trans in zip(indexed_texts, translations):
175
- result[orig_idx] = trans
176
 
177
- return result
178
-
179
- except Exception as e:
180
- print(f"Gemini batch translation error: {e}")
181
- # Fallback to single translations
182
- return [self.translate_single(t, source, target) for t in texts]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  def translate_pages_batch(
185
  self,
186
  pages_texts: Dict[str, List[str]],
187
  source: str = "ja",
188
  target: str = "en",
189
- custom_prompt: str = None
 
190
  ) -> Dict[str, List[str]]:
191
  """
192
  Translate texts from multiple pages in a single API call.
@@ -197,6 +256,7 @@ Example output format: ["translated text 1", "translated text 2", ...]"""
197
  source: Source language code
198
  target: Target language code
199
  custom_prompt: Override custom prompt for this call
 
200
 
201
  Returns:
202
  Dict with same structure but translated texts
@@ -210,15 +270,41 @@ Example output format: ["translated text 1", "translated text 2", ...]"""
210
  style = custom_prompt or self.custom_prompt
211
  style_text = f"\nStyle instructions: {style}" if style else ""
212
 
213
- prompt = f"""You are a professional comic/manga translator. Translate all {source_name} texts to {target_name}.
214
- Keep translations natural, conversational, and suitable for comic speech bubbles.
215
- Maintain the context and flow between pages as they are sequential comic pages.{style_text}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- Input (JSON - page names with their text bubbles):
218
  {json.dumps(pages_texts, ensure_ascii=False, indent=2)}
219
 
220
- IMPORTANT: Return ONLY a JSON object with the exact same structure but with translated texts.
221
- Keep the same page names and order. No explanations or markdown."""
222
 
223
  try:
224
  response = self.model.generate_content(prompt)
 
6
  import google.generativeai as genai
7
  import json
8
  import os
9
+ import time
10
  from typing import List, Dict, Optional
11
 
12
+ # Constants for retry logic
13
+ MAX_RETRIES = 3
14
+ RETRY_DELAY_BASE = 0.5 # Faster recovery: 0.5s → 1s → 2s
15
+
16
 
17
  class GeminiTranslator:
18
  """
 
37
  # Preset style templates
38
  STYLE_PRESETS = {
39
  "default": "",
40
+ "formal": "Use formal, polite language. Use respectful pronouns and expressions.",
41
+ "casual": "Use casual, natural everyday language. Like friends talking to each other.",
42
+ "keep_honorifics": "Keep Japanese honorifics like -san, -kun, -chan, -sama, senpai, sensei untranslated.",
43
+ "localize": "Fully localize cultural references. Adapt idioms and expressions to feel native.",
44
+ "literal": "Translate meaning accurately but ensure it still sounds natural when spoken.",
45
+ "web_novel": "Use dramatic web novel style with impactful expressions and emotional weight.",
46
+ "action": "Use short, punchy sentences. Quick pace. Impactful dialogue.",
47
  }
48
 
49
  def __init__(self, api_key: str = None, custom_prompt: str = None, style: str = "default"):
 
102
  style = custom_prompt or self.custom_prompt
103
  style_text = f"\nStyle: {style}" if style else ""
104
 
105
+ prompt = f"""You are an expert manga/comic translator specializing in {source_name} to {target_name} translation.
106
+
107
+ Translation Guidelines:
108
+ - Translate for SPOKEN dialogue, not written text. It should sound natural when read aloud.
109
+ - Preserve the character's tone, emotion, and personality through word choice.
110
+ - Use natural sentence structures in {target_name}. Avoid awkward literal translations.
111
+ - For Vietnamese: Use appropriate pronouns (tao/mày for close friends, tôi/anh/em for normal, etc.) based on context.
112
+ - Keep exclamations and emotional expressions feeling authentic.
113
+ - Maintain the impact and rhythm of short/punchy lines.{style_text}
114
+
115
+ IMPORTANT: Return ONLY the translated text. No explanations, no quotes, no formatting.
116
 
117
+ Original text: {text}"""
118
 
119
  try:
120
  response = self.model.generate_content(prompt)
 
131
  custom_prompt: str = None
132
  ) -> List[str]:
133
  """
134
+ Translate multiple texts in a single API call with retry logic.
135
 
136
  Args:
137
  texts: List of texts to translate
 
151
  if not indexed_texts:
152
  return texts
153
 
154
+ texts_to_translate = [t for _, t in indexed_texts]
155
+ translations = self._translate_batch_internal(texts_to_translate, source, target, custom_prompt)
156
+
157
+ # Rebuild full list with original empty strings preserved
158
+ result = list(texts)
159
+ for (orig_idx, _), trans in zip(indexed_texts, translations):
160
+ result[orig_idx] = trans
161
+
162
+ return result
163
+
164
+ def _translate_batch_internal(
165
+ self,
166
+ texts_to_translate: List[str],
167
+ source: str,
168
+ target: str,
169
+ custom_prompt: str = None
170
+ ) -> List[str]:
171
+ """Internal method to translate a single chunk with retry logic."""
172
  source_name = self.LANG_NAMES.get(source, "Japanese")
173
  target_name = self.LANG_NAMES.get(target, "English")
 
174
 
175
  style = custom_prompt or self.custom_prompt
176
  style_text = f"\nStyle instructions: {style}" if style else ""
177
 
178
+ prompt = f"""You are an expert manga/comic translator with years of experience in {source_name} to {target_name} translation.
 
179
 
180
+ Translation Guidelines:
181
+ - These are speech bubble texts from the SAME comic page - maintain consistency in character voices.
182
+ - Translate for SPOKEN dialogue. It must sound natural when read aloud, not stiff or robotic.
183
+ - Preserve each character's tone, emotion, and personality through appropriate word choice.
184
+ - Use natural {target_name} sentence structures. AVOID awkward literal word-for-word translations.
185
+ - For Vietnamese specifically:
186
+ + Use appropriate pronouns based on relationship (tao/mày, tôi/cậu, anh/em, etc.)
187
+ + Translate exclamations naturally (くそ → Chết tiệt, やばい → Chết rồi, etc.)
188
+ + Keep dialogue feeling authentic to how Vietnamese people actually speak
189
+ - Maintain the impact of short/punchy lines. Don't over-explain.
190
+ - Keep emotional expressions and interjections feeling authentic.{style_text}
191
+
192
+ Input texts (JSON array - each is a separate speech bubble):
193
  {json.dumps(texts_to_translate, ensure_ascii=False)}
194
 
195
+ IMPORTANT: Return ONLY a valid JSON array with translated texts in the EXACT same order.
196
+ Format: ["translation 1", "translation 2", ...]"""
197
 
198
+ # Retry with exponential backoff
199
+ for attempt in range(MAX_RETRIES):
200
+ try:
201
+ response = self.model.generate_content(prompt)
202
+ result_text = response.text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
+ # Clean up response if needed
205
+ if result_text.startswith("```json"):
206
+ result_text = result_text[7:]
207
+ if result_text.startswith("```"):
208
+ result_text = result_text[3:]
209
+ if result_text.endswith("```"):
210
+ result_text = result_text[:-3]
211
+ result_text = result_text.strip()
212
+
213
+ translations = json.loads(result_text)
214
+
215
+ # Validate response length
216
+ if len(translations) != len(texts_to_translate):
217
+ raise ValueError(f"Expected {len(texts_to_translate)} translations, got {len(translations)}")
218
+
219
+ return translations
220
+
221
+ except Exception as e:
222
+ error_str = str(e)
223
+ print(f"Gemini batch attempt {attempt + 1}/{MAX_RETRIES} failed: {e}")
224
+
225
+ # Check if it's a quota error - don't retry or fallback
226
+ if "429" in error_str or "quota" in error_str.lower():
227
+ print("⚠️ Quota exceeded! Returning original texts to avoid more API calls.")
228
+ print(" Wait 1 minute or upgrade your Gemini API plan.")
229
+ return texts_to_translate # Return original texts
230
+
231
+ if attempt < MAX_RETRIES - 1:
232
+ delay = RETRY_DELAY_BASE * (2 ** attempt)
233
+ print(f"Retrying in {delay}s...")
234
+ time.sleep(delay)
235
+ else:
236
+ # Only fallback to single translations if NOT quota error
237
+ print("All retries failed, falling back to single translations")
238
+ return [self.translate_single(t, source, target) for t in texts_to_translate]
239
+
240
+ return texts_to_translate # Fallback: return original
241
 
242
  def translate_pages_batch(
243
  self,
244
  pages_texts: Dict[str, List[str]],
245
  source: str = "ja",
246
  target: str = "en",
247
+ custom_prompt: str = None,
248
+ context: Dict[str, List[str]] = None
249
  ) -> Dict[str, List[str]]:
250
  """
251
  Translate texts from multiple pages in a single API call.
 
256
  source: Source language code
257
  target: Target language code
258
  custom_prompt: Override custom prompt for this call
259
+ context: Optional dict of ALL page texts for context (helps maintain consistency)
260
 
261
  Returns:
262
  Dict with same structure but translated texts
 
270
  style = custom_prompt or self.custom_prompt
271
  style_text = f"\nStyle instructions: {style}" if style else ""
272
 
273
+ # Build context section if context is provided
274
+ context_section = ""
275
+ if context and context != pages_texts:
276
+ other_pages = {k: v for k, v in context.items() if k not in pages_texts}
277
+ if other_pages:
278
+ context_preview = []
279
+ for page, texts in list(other_pages.items())[:5]:
280
+ context_preview.append(f"{page}: {' | '.join(texts[:3])}...")
281
+ context_section = f"""
282
+ STORY CONTEXT (from other pages - use for character/tone consistency):
283
+ {chr(10).join(context_preview)}
284
+ ---
285
+ """
286
+
287
+ prompt = f"""You are an expert manga/comic translator with deep understanding of {source_name} to {target_name} translation.
288
+ {context_section}
289
+ Context: These are SEQUENTIAL comic pages telling a continuous story. Maintain narrative flow and character voice consistency across all pages.
290
+
291
+ Translation Guidelines:
292
+ - Translate for SPOKEN dialogue - it must sound natural when read aloud.
293
+ - Each character should have a consistent voice/speaking style across pages.
294
+ - Preserve tone, emotion, and personality through careful word choice.
295
+ - Use natural {target_name} sentence structures. NEVER translate word-for-word literally.
296
+ - For Vietnamese:
297
+ + Choose appropriate pronouns based on character relationships and social context
298
+ + Translate interjections and exclamations to feel authentic (not literal)
299
+ + Use natural Vietnamese speech patterns, not textbook Vietnamese
300
+ - Keep short lines impactful. Don't pad or over-explain.
301
+ - Sound effects and onomatopoeia: translate the meaning/feeling, not literally.{style_text}
302
 
303
+ Input (JSON - sequential pages with their speech bubbles):
304
  {json.dumps(pages_texts, ensure_ascii=False, indent=2)}
305
 
306
+ IMPORTANT: Return ONLY a valid JSON object with the exact same structure but with translated texts.
307
+ Keep page names and bubble order exactly the same. No explanations or markdown."""
308
 
309
  try:
310
  response = self.model.generate_content(prompt)
translator/translator.py CHANGED
@@ -150,7 +150,9 @@ class MangaTranslator:
150
  try:
151
  if self._gemini_translator is None:
152
  from .gemini_translator import GeminiTranslator
153
- api_key = self.gemini_api_key or "AIzaSyAplFKOKBEcQku5m6gPEBMlZMGc4sI5rgo"
 
 
154
  custom_prompt = getattr(self, '_gemini_custom_prompt', None)
155
  self._gemini_translator = GeminiTranslator(
156
  api_key=api_key,
 
150
  try:
151
  if self._gemini_translator is None:
152
  from .gemini_translator import GeminiTranslator
153
+ api_key = getattr(self, '_gemini_api_key', None) or self.gemini_api_key
154
+ if not api_key:
155
+ raise ValueError("Gemini API key required. Please enter it in the web form.")
156
  custom_prompt = getattr(self, '_gemini_custom_prompt', None)
157
  self._gemini_translator = GeminiTranslator(
158
  api_key=api_key,