Upload 83 files
Browse files- .gitattributes +13 -0
- README.md +61 -16
- __pycache__/app.cpython-311.pyc +0 -0
- __pycache__/font_analyzer.cpython-311.pyc +0 -0
- __pycache__/process_bubble.cpython-311.pyc +0 -0
- app.py +491 -36
- font_analyzer.py +151 -0
- fonts/Yuki-Arenzi.ttf +0 -0
- fonts/Yuki-Burobu.ttf +3 -0
- fonts/Yuki-CCMarianChurchlandJournal.ttf +3 -0
- fonts/Yuki-CDX Starstreak.ttf +3 -0
- fonts/Yuki-CHICKEN Pie.ttf +3 -0
- fonts/Yuki-CrashLanding BB.ttf +0 -0
- fonts/Yuki-Downhill Dive.ttf +3 -0
- fonts/Yuki-Gingerline DEMO Regular.ttf +0 -0
- fonts/Yuki-Gorrilaz_Story.ttf +3 -0
- fonts/Yuki-KG Only Angel.ttf +3 -0
- fonts/Yuki-LF SwandsHand.ttf +0 -0
- fonts/Yuki-La Belle Aurore.ttf +0 -0
- fonts/Yuki-Little Cupcakes.ttf +3 -0
- fonts/Yuki-Nagurigaki Crayon.ttf +3 -0
- fonts/Yuki-Ripsnort BB.ttf +3 -0
- fonts/Yuki-Roasthink.ttf +0 -0
- fonts/Yuki-Screwball.ttf +0 -0
- fonts/Yuki-Shark Crash.ttf +3 -0
- fonts/Yuki-Skulduggery.ttf +3 -0
- fonts/Yuki-Superscratchy.ttf +0 -0
- fonts/Yuki-Tea And Oranges Regular.ttf +3 -0
- ocr/__pycache__/chrome_lens_ocr.cpython-311.pyc +0 -0
- ocr/chrome_lens_ocr.py +58 -0
- process_bubble.py +12 -1
- static/css/style.css +50 -0
- static/js/app.js +55 -0
- templates/index.html +148 -0
- templates/translate.html +20 -7
- translator/__pycache__/__init__.cpython-311.pyc +0 -0
- translator/__pycache__/copilot_translator.cpython-311.pyc +0 -0
- translator/__pycache__/gemini_translator.cpython-311.pyc +0 -0
- translator/__pycache__/translator.cpython-311.pyc +0 -0
- translator/copilot_translator.py +351 -0
- translator/gemini_translator.py +136 -50
- translator/translator.py +3 -1
.gitattributes
CHANGED
|
@@ -47,3 +47,16 @@ examples/ex3.png filter=lfs diff=lfs merge=lfs -text
|
|
| 47 |
fonts/ariali.ttf filter=lfs diff=lfs merge=lfs -text
|
| 48 |
static/img/loading.gif filter=lfs diff=lfs merge=lfs -text
|
| 49 |
static/img/back.jpg filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
fonts/ariali.ttf filter=lfs diff=lfs merge=lfs -text
|
| 48 |
static/img/loading.gif filter=lfs diff=lfs merge=lfs -text
|
| 49 |
static/img/back.jpg filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
fonts/Yuki-Burobu.ttf filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
fonts/Yuki-CCMarianChurchlandJournal.ttf filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
fonts/Yuki-CDX[[:space:]]Starstreak.ttf filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
fonts/Yuki-CHICKEN[[:space:]]Pie.ttf filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
fonts/Yuki-Downhill[[:space:]]Dive.ttf filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
fonts/Yuki-Gorrilaz_Story.ttf filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
fonts/Yuki-KG[[:space:]]Only[[:space:]]Angel.ttf filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
fonts/Yuki-Little[[:space:]]Cupcakes.ttf filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
fonts/Yuki-Nagurigaki[[:space:]]Crayon.ttf filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
fonts/Yuki-Ripsnort[[:space:]]BB.ttf filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
fonts/Yuki-Shark[[:space:]]Crash.ttf filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
fonts/Yuki-Skulduggery.ttf filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
fonts/Yuki-Tea[[:space:]]And[[:space:]]Oranges[[:space:]]Regular.ttf filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -7,24 +7,69 @@ sdk: docker
|
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
---
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
-
##
|
| 16 |
-
- 🔍 YOLO-based bubble detection
|
| 17 |
-
- 📝 Multiple OCR engines
|
| 18 |
-
- 🌐 Multiple translators
|
| 19 |
-
- 📏 Smart handling for long webtoon images
|
| 20 |
-
- 🎨 Custom fonts support
|
| 21 |
|
| 22 |
-
##
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
4. Click Translate!
|
| 27 |
|
| 28 |
-
##
|
| 29 |
-
-
|
| 30 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
---
|
| 10 |
+
# Manga Translator 📚
|
| 11 |
|
| 12 |
+
Dịch tự động speech bubbles trong manga/manhwa/manhua!
|
| 13 |
|
| 14 |
+
## ✨ Features
|
| 15 |
|
| 16 |
+
### Core
|
| 17 |
+
- 🔍 **YOLO-based bubble detection** - Phát hiện speech bubble tự động
|
| 18 |
+
- 📝 **Multiple OCR engines** - Manga-OCR, Chrome Lens (batch support)
|
| 19 |
+
- 🌐 **Multiple translators** - Gemini, Copilot API, NLLB, Opus-MT
|
|
|
|
|
|
|
| 20 |
|
| 21 |
+
### Translation
|
| 22 |
+
- 🧠 **Context Memory** - Sử dụng context từ tất cả ảnh để dịch chính xác hơn
|
| 23 |
+
- 🎯 **Multi-page batch translation** - Dịch 10 pages/API call tiết kiệm quota
|
| 24 |
+
- 🎨 **Translation styles** - Default, Casual, Formal, Keep Honorifics, Web Novel...
|
|
|
|
| 25 |
|
| 26 |
+
### UI/UX
|
| 27 |
+
- 📊 **Real-time progress** - Progress bar hiển thị tiến độ theo từng phase
|
| 28 |
+
- 📦 **Download ZIP** - Tải tất cả ảnh đã dịch dưới dạng ZIP
|
| 29 |
+
- 🔤 **Auto font sizing** - Tự động điều chỉnh cỡ chữ theo bubble
|
| 30 |
+
- 📏 **24+ fonts** - Yuki fonts, AnimeAce, và nhiều font khác
|
| 31 |
+
|
| 32 |
+
## 🚀 Usage
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
# Install dependencies
|
| 36 |
+
pip install -r requirements.txt
|
| 37 |
+
|
| 38 |
+
# Run
|
| 39 |
+
python app.py
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
Mở http://localhost:5000
|
| 43 |
+
|
| 44 |
+
## 📋 Workflow
|
| 45 |
+
|
| 46 |
+
1. Upload manga/manhwa images
|
| 47 |
+
2. Chọn ngôn ngữ gốc (Japanese/Chinese/Korean/English)
|
| 48 |
+
3. Chọn ngôn ngữ đích (Vietnamese, English, ...)
|
| 49 |
+
4. Chọn translator (Gemini/Copilot) và OCR engine
|
| 50 |
+
5. Check "Context Memory" để dịch chính xác hơn
|
| 51 |
+
6. Click **Translate**!
|
| 52 |
+
7. Xem progress bar real-time
|
| 53 |
+
8. Download từng ảnh hoặc **Download ZIP**
|
| 54 |
+
|
| 55 |
+
## 🌍 Supported Languages
|
| 56 |
+
|
| 57 |
+
| Source | Target |
|
| 58 |
+
|--------|--------|
|
| 59 |
+
| Japanese (Manga) | Vietnamese |
|
| 60 |
+
| Chinese (Manhua) | English |
|
| 61 |
+
| Korean (Manhwa) | Chinese |
|
| 62 |
+
| English (Comic) | Korean, Thai, Indonesian, French, German, Spanish, Russian |
|
| 63 |
+
|
| 64 |
+
## 📡 API Keys
|
| 65 |
+
|
| 66 |
+
- **Gemini**: Nhập API key từ [ai.google.dev](https://ai.google.dev)
|
| 67 |
+
- **Copilot**: Chạy server [copilot-api](https://github.com/copilot-api) local
|
| 68 |
+
|
| 69 |
+
## 🔧 Tech Stack
|
| 70 |
+
|
| 71 |
+
- Flask + Flask-SocketIO (real-time WebSocket)
|
| 72 |
+
- YOLOv8 (bubble detection)
|
| 73 |
+
- Manga-OCR / Chrome-Lens (OCR)
|
| 74 |
+
- Gemini / Copilot API (translation)
|
| 75 |
+
- PIL (text rendering)
|
__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
|
__pycache__/font_analyzer.cpython-311.pyc
ADDED
|
Binary file (9.07 kB). View file
|
|
|
__pycache__/process_bubble.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/process_bubble.cpython-311.pyc and b/__pycache__/process_bubble.cpython-311.pyc differ
|
|
|
app.py
CHANGED
|
@@ -1,4 +1,8 @@
|
|
| 1 |
-
from flask import Flask, render_template, request, redirect
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from detect_bubbles import detect_bubbles
|
| 3 |
from process_bubble import process_bubble
|
| 4 |
from translator.translator import MangaTranslator
|
|
@@ -14,7 +18,10 @@ import os
|
|
| 14 |
|
| 15 |
app = Flask(__name__)
|
| 16 |
app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY", "secret_key")
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
MODEL_PATH = "model/model.pt"
|
| 20 |
|
|
@@ -24,10 +31,11 @@ def home():
|
|
| 24 |
return render_template("index.html")
|
| 25 |
|
| 26 |
|
| 27 |
-
def process_single_image(image, manga_translator, mocr, selected_translator, selected_font):
|
| 28 |
"""Process a single image and return the translated version.
|
| 29 |
|
| 30 |
Optimized with batch translation for Gemini to reduce API calls.
|
|
|
|
| 31 |
"""
|
| 32 |
results = detect_bubbles(MODEL_PATH, image)
|
| 33 |
|
|
@@ -37,11 +45,16 @@ def process_single_image(image, manga_translator, mocr, selected_translator, sel
|
|
| 37 |
# Phase 1: Collect all bubble data and OCR texts
|
| 38 |
bubble_data = []
|
| 39 |
texts_to_translate = []
|
|
|
|
| 40 |
|
| 41 |
for result in results:
|
| 42 |
x1, y1, x2, y2, score, class_id = result
|
| 43 |
detected_image = image[int(y1):int(y2), int(x1):int(x2)]
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
# Fix: detected_image is already uint8, no need to multiply by 255
|
| 46 |
im = Image.fromarray(detected_image)
|
| 47 |
text = mocr(im)
|
|
@@ -55,13 +68,19 @@ def process_single_image(image, manga_translator, mocr, selected_translator, sel
|
|
| 55 |
})
|
| 56 |
texts_to_translate.append(text)
|
| 57 |
|
| 58 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
if selected_translator == "gemini" and len(texts_to_translate) > 1:
|
| 60 |
# Use batch translation for Gemini
|
| 61 |
try:
|
| 62 |
if manga_translator._gemini_translator is None:
|
| 63 |
from translator.gemini_translator import GeminiTranslator
|
| 64 |
-
api_key = manga_translator
|
|
|
|
|
|
|
| 65 |
custom_prompt = getattr(manga_translator, '_gemini_custom_prompt', None)
|
| 66 |
manga_translator._gemini_translator = GeminiTranslator(
|
| 67 |
api_key=api_key,
|
|
@@ -76,35 +95,309 @@ def process_single_image(image, manga_translator, mocr, selected_translator, sel
|
|
| 76 |
except Exception as e:
|
| 77 |
print(f"Batch translation failed, falling back to single: {e}")
|
| 78 |
translated_texts = [manga_translator.translate(t, method=selected_translator) for t in texts_to_translate]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
else:
|
| 80 |
# Single translation for other translators
|
| 81 |
translated_texts = [manga_translator.translate(t, method=selected_translator) for t in texts_to_translate]
|
| 82 |
|
| 83 |
# Phase 3: Add translated text to bubbles
|
| 84 |
-
|
|
|
|
| 85 |
for data, translated_text in zip(bubble_data, translated_texts):
|
| 86 |
add_text(data['detected_image'], translated_text, font_path, data['contour'])
|
| 87 |
|
| 88 |
return image
|
| 89 |
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
@app.route("/translate", methods=["POST"])
|
| 92 |
def upload_file():
|
| 93 |
# Get translator selection
|
| 94 |
translator_map = {
|
| 95 |
"Opus-mt model": "hf",
|
| 96 |
"NLLB": "nllb",
|
| 97 |
-
"Gemini": "gemini"
|
|
|
|
| 98 |
}
|
| 99 |
selected_translator = translator_map.get(
|
| 100 |
request.form["selected_translator"],
|
| 101 |
request.form["selected_translator"].lower()
|
| 102 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
# Get font selection
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
# Get OCR engine
|
| 110 |
selected_ocr = request.form.get("selected_ocr", "chrome-lens").lower()
|
|
@@ -167,46 +460,170 @@ def upload_file():
|
|
| 167 |
if selected_translator == "gemini" and style:
|
| 168 |
manga_translator._gemini_custom_prompt = style
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
if selected_ocr == "chrome-lens":
|
| 171 |
mocr = ChromeLensOCR()
|
| 172 |
else:
|
| 173 |
mocr = MangaOcr()
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
# Process all images
|
| 176 |
processed_images = []
|
|
|
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
try:
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
)
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
encoded_image = base64.b64encode(buffer.tobytes()).decode("utf-8")
|
| 201 |
-
|
| 202 |
processed_images.append({
|
| 203 |
-
"name": name,
|
| 204 |
"data": encoded_image
|
| 205 |
})
|
| 206 |
-
|
| 207 |
except Exception as e:
|
| 208 |
-
print(f"Error
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
if not processed_images:
|
| 212 |
return redirect("/")
|
|
@@ -214,5 +631,43 @@ def upload_file():
|
|
| 214 |
return render_template("translate.html", images=processed_images)
|
| 215 |
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
if __name__ == "__main__":
|
| 218 |
-
|
|
|
|
| 1 |
+
from flask import Flask, render_template, request, redirect, send_file, jsonify
|
| 2 |
+
from flask_socketio import SocketIO, emit
|
| 3 |
+
import io
|
| 4 |
+
import zipfile
|
| 5 |
+
import json
|
| 6 |
from detect_bubbles import detect_bubbles
|
| 7 |
from process_bubble import process_bubble
|
| 8 |
from translator.translator import MangaTranslator
|
|
|
|
| 18 |
|
| 19 |
app = Flask(__name__)
|
| 20 |
app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY", "secret_key")
|
| 21 |
+
# No upload size limit (removed MAX_CONTENT_LENGTH restriction)
|
| 22 |
+
|
| 23 |
+
# Initialize SocketIO for real-time progress updates
|
| 24 |
+
socketio = SocketIO(app, cors_allowed_origins="*", async_mode='threading')
|
| 25 |
|
| 26 |
MODEL_PATH = "model/model.pt"
|
| 27 |
|
|
|
|
| 31 |
return render_template("index.html")
|
| 32 |
|
| 33 |
|
| 34 |
+
def process_single_image(image, manga_translator, mocr, selected_translator, selected_font, font_analyzer=None):
|
| 35 |
"""Process a single image and return the translated version.
|
| 36 |
|
| 37 |
Optimized with batch translation for Gemini to reduce API calls.
|
| 38 |
+
Supports auto font matching when font_analyzer is provided and selected_font is 'auto'.
|
| 39 |
"""
|
| 40 |
results = detect_bubbles(MODEL_PATH, image)
|
| 41 |
|
|
|
|
| 45 |
# Phase 1: Collect all bubble data and OCR texts
|
| 46 |
bubble_data = []
|
| 47 |
texts_to_translate = []
|
| 48 |
+
first_bubble_image = None # For font analysis
|
| 49 |
|
| 50 |
for result in results:
|
| 51 |
x1, y1, x2, y2, score, class_id = result
|
| 52 |
detected_image = image[int(y1):int(y2), int(x1):int(x2)]
|
| 53 |
|
| 54 |
+
# Save first bubble for font analysis (before processing)
|
| 55 |
+
if first_bubble_image is None:
|
| 56 |
+
first_bubble_image = detected_image.copy()
|
| 57 |
+
|
| 58 |
# Fix: detected_image is already uint8, no need to multiply by 255
|
| 59 |
im = Image.fromarray(detected_image)
|
| 60 |
text = mocr(im)
|
|
|
|
| 68 |
})
|
| 69 |
texts_to_translate.append(text)
|
| 70 |
|
| 71 |
+
# Auto font matching: analyze first bubble and select best font
|
| 72 |
+
# Note: font is now determined BEFORE processing, passed as selected_font
|
| 73 |
+
# (Analysis moved to upload_file to only run once per batch)
|
| 74 |
+
|
| 75 |
+
# Phase 2: Batch translate
|
| 76 |
if selected_translator == "gemini" and len(texts_to_translate) > 1:
|
| 77 |
# Use batch translation for Gemini
|
| 78 |
try:
|
| 79 |
if manga_translator._gemini_translator is None:
|
| 80 |
from translator.gemini_translator import GeminiTranslator
|
| 81 |
+
api_key = getattr(manga_translator, '_gemini_api_key', None)
|
| 82 |
+
if not api_key:
|
| 83 |
+
raise ValueError("Gemini API key not provided")
|
| 84 |
custom_prompt = getattr(manga_translator, '_gemini_custom_prompt', None)
|
| 85 |
manga_translator._gemini_translator = GeminiTranslator(
|
| 86 |
api_key=api_key,
|
|
|
|
| 95 |
except Exception as e:
|
| 96 |
print(f"Batch translation failed, falling back to single: {e}")
|
| 97 |
translated_texts = [manga_translator.translate(t, method=selected_translator) for t in texts_to_translate]
|
| 98 |
+
|
| 99 |
+
elif selected_translator == "copilot" and len(texts_to_translate) > 1:
|
| 100 |
+
# Use batch translation for Copilot
|
| 101 |
+
try:
|
| 102 |
+
if not hasattr(manga_translator, '_copilot_translator') or manga_translator._copilot_translator is None:
|
| 103 |
+
from translator.copilot_translator import CopilotTranslator
|
| 104 |
+
copilot_server = getattr(manga_translator, '_copilot_server', 'http://localhost:8080')
|
| 105 |
+
copilot_model = getattr(manga_translator, '_copilot_model', 'gpt-4o')
|
| 106 |
+
manga_translator._copilot_translator = CopilotTranslator(
|
| 107 |
+
server_url=copilot_server,
|
| 108 |
+
model=copilot_model
|
| 109 |
+
)
|
| 110 |
+
print(f"Copilot translator initialized: {copilot_server} / {copilot_model}")
|
| 111 |
+
|
| 112 |
+
translated_texts = manga_translator._copilot_translator.translate_batch(
|
| 113 |
+
texts_to_translate,
|
| 114 |
+
source=manga_translator.source,
|
| 115 |
+
target=manga_translator.target
|
| 116 |
+
)
|
| 117 |
+
except Exception as e:
|
| 118 |
+
print(f"Copilot batch translation failed: {e}")
|
| 119 |
+
translated_texts = texts_to_translate # Return original on error
|
| 120 |
+
|
| 121 |
else:
|
| 122 |
# Single translation for other translators
|
| 123 |
translated_texts = [manga_translator.translate(t, method=selected_translator) for t in texts_to_translate]
|
| 124 |
|
| 125 |
# Phase 3: Add translated text to bubbles
|
| 126 |
+
# Determine correct font path based on font name
|
| 127 |
+
font_path = get_font_path(selected_font)
|
| 128 |
for data, translated_text in zip(bubble_data, translated_texts):
|
| 129 |
add_text(data['detected_image'], translated_text, font_path, data['contour'])
|
| 130 |
|
| 131 |
return image
|
| 132 |
|
| 133 |
|
| 134 |
+
def get_font_path(font_name: str) -> str:
|
| 135 |
+
"""Get the correct font file path based on font name."""
|
| 136 |
+
# Handle legacy fonts with 'i' suffix
|
| 137 |
+
if font_name in ["animeace_", "arial", "mangat"]:
|
| 138 |
+
return f"fonts/{font_name}i.ttf"
|
| 139 |
+
# Yuki-* fonts use exact name
|
| 140 |
+
elif font_name.startswith("Yuki-") or font_name.startswith("yuki-"):
|
| 141 |
+
return f"fonts/{font_name}.ttf"
|
| 142 |
+
else:
|
| 143 |
+
return f"fonts/{font_name}.ttf"
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def process_images_with_batch(images_data, manga_translator, mocr, selected_font, translator_type, batch_size=10, use_context_memory=True):
|
| 147 |
+
"""
|
| 148 |
+
Process multiple images with multi-page batching for Copilot or Gemini.
|
| 149 |
+
Collects all texts first, batch translates, then applies translations.
|
| 150 |
+
|
| 151 |
+
Args:
|
| 152 |
+
images_data: List of dicts with 'image', 'name' keys
|
| 153 |
+
manga_translator: MangaTranslator instance with translator
|
| 154 |
+
mocr: OCR engine
|
| 155 |
+
selected_font: Font to use
|
| 156 |
+
translator_type: 'copilot' or 'gemini'
|
| 157 |
+
batch_size: Number of pages per API call
|
| 158 |
+
use_context_memory: Whether to include context from all pages for better translation
|
| 159 |
+
|
| 160 |
+
Returns:
|
| 161 |
+
List of processed images with translations applied
|
| 162 |
+
"""
|
| 163 |
+
import time
|
| 164 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 165 |
+
|
| 166 |
+
def emit_progress(phase, current, total, message):
|
| 167 |
+
"""Emit progress update via WebSocket."""
|
| 168 |
+
try:
|
| 169 |
+
socketio.emit('progress', {
|
| 170 |
+
'phase': phase,
|
| 171 |
+
'current': current,
|
| 172 |
+
'total': total,
|
| 173 |
+
'message': message,
|
| 174 |
+
'percent': int((current / max(total, 1)) * 100)
|
| 175 |
+
})
|
| 176 |
+
except Exception as e:
|
| 177 |
+
pass # Silently fail if socket not connected
|
| 178 |
+
|
| 179 |
+
total_images = len(images_data)
|
| 180 |
+
print(f"\n{'='*50}")
|
| 181 |
+
print(f"Processing {total_images} images...")
|
| 182 |
+
print(f"Context Memory: {'ON' if use_context_memory else 'OFF'}")
|
| 183 |
+
print(f"{'='*50}")
|
| 184 |
+
|
| 185 |
+
start_time = time.time()
|
| 186 |
+
|
| 187 |
+
# Check if using Chrome Lens OCR (has batch support)
|
| 188 |
+
use_batch_ocr = hasattr(mocr, 'process_batch')
|
| 189 |
+
|
| 190 |
+
# Phase 1a: Detect bubbles and collect all bubble images
|
| 191 |
+
print("\n[Phase 1] Detecting bubbles...")
|
| 192 |
+
emit_progress('detection', 0, total_images, 'Bắt đầu phát hiện speech bubbles...')
|
| 193 |
+
all_pages_data = {} # {page_name: {'image': img, 'bubbles': [...], 'bubble_images': [...]}}
|
| 194 |
+
all_bubble_images = [] # Flat list for batch OCR
|
| 195 |
+
bubble_mapping = [] # [(page_name, bubble_idx), ...] to map back
|
| 196 |
+
|
| 197 |
+
for idx, img_data in enumerate(images_data):
|
| 198 |
+
image = img_data['image']
|
| 199 |
+
name = img_data['name']
|
| 200 |
+
|
| 201 |
+
emit_progress('detection', idx + 1, total_images, f'Phát hiện bubbles: {name}')
|
| 202 |
+
print(f" [{idx+1}/{total_images}] {name}", end="", flush=True)
|
| 203 |
+
|
| 204 |
+
results = detect_bubbles(MODEL_PATH, image)
|
| 205 |
+
if not results:
|
| 206 |
+
all_pages_data[name] = {'image': image, 'bubbles': [], 'texts': []}
|
| 207 |
+
print(f" - 0 bubbles")
|
| 208 |
+
continue
|
| 209 |
+
|
| 210 |
+
print(f" - {len(results)} bubbles")
|
| 211 |
+
|
| 212 |
+
bubble_data = []
|
| 213 |
+
|
| 214 |
+
for bubble_idx, result in enumerate(results):
|
| 215 |
+
x1, y1, x2, y2, score, class_id = result
|
| 216 |
+
detected_image = image[int(y1):int(y2), int(x1):int(x2)]
|
| 217 |
+
|
| 218 |
+
# IMPORTANT: Add to OCR queue BEFORE processing (which fills white)
|
| 219 |
+
all_bubble_images.append(Image.fromarray(detected_image.copy()))
|
| 220 |
+
bubble_mapping.append((name, bubble_idx))
|
| 221 |
+
|
| 222 |
+
# Process bubble (fill white) - this modifies the original image via view
|
| 223 |
+
processed_image, cont = process_bubble(detected_image)
|
| 224 |
+
|
| 225 |
+
bubble_data.append({
|
| 226 |
+
'detected_image': processed_image,
|
| 227 |
+
'contour': cont,
|
| 228 |
+
'coords': (int(x1), int(y1), int(x2), int(y2))
|
| 229 |
+
})
|
| 230 |
+
|
| 231 |
+
all_pages_data[name] = {
|
| 232 |
+
'image': image,
|
| 233 |
+
'bubbles': bubble_data,
|
| 234 |
+
'texts': [] # Will fill after OCR
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
detection_time = time.time() - start_time
|
| 238 |
+
print(f"✓ Bubble detection completed in {detection_time:.1f}s ({len(all_bubble_images)} total bubbles)")
|
| 239 |
+
emit_progress('detection', total_images, total_images, f'Phát hiện xong {len(all_bubble_images)} bubbles')
|
| 240 |
+
|
| 241 |
+
# Phase 1b: Batch OCR all bubbles at once
|
| 242 |
+
if all_bubble_images:
|
| 243 |
+
ocr_start = time.time()
|
| 244 |
+
emit_progress('ocr', 0, 1, f'Đang OCR {len(all_bubble_images)} bubbles...')
|
| 245 |
+
print(f"\n[Phase 2] OCR processing {len(all_bubble_images)} bubbles...", end=" ", flush=True)
|
| 246 |
+
|
| 247 |
+
if use_batch_ocr:
|
| 248 |
+
# Use concurrent batch OCR (Chrome Lens)
|
| 249 |
+
all_texts = mocr.process_batch(all_bubble_images)
|
| 250 |
+
else:
|
| 251 |
+
# Sequential OCR (MangaOcr or others)
|
| 252 |
+
all_texts = [mocr(img) for img in all_bubble_images]
|
| 253 |
+
|
| 254 |
+
# Map texts back to pages
|
| 255 |
+
for (page_name, bubble_idx), text in zip(bubble_mapping, all_texts):
|
| 256 |
+
all_pages_data[page_name]['texts'].append(text)
|
| 257 |
+
|
| 258 |
+
ocr_time = time.time() - ocr_start
|
| 259 |
+
print(f"({ocr_time:.1f}s)")
|
| 260 |
+
print(f"✓ OCR completed in {ocr_time:.1f}s ({len(all_bubble_images)/ocr_time:.1f} bubbles/sec)")
|
| 261 |
+
emit_progress('ocr', 1, 1, f'OCR hoàn tất ({len(all_bubble_images)} bubbles)')
|
| 262 |
+
|
| 263 |
+
# Phase 3: Batch translate all pages together
|
| 264 |
+
emit_progress('translation', 0, 1, 'Đang dịch...')
|
| 265 |
+
pages_texts = {name: data['texts'] for name, data in all_pages_data.items() if data['texts']}
|
| 266 |
+
all_translations = {}
|
| 267 |
+
|
| 268 |
+
if pages_texts:
|
| 269 |
+
# Get the translator based on type
|
| 270 |
+
if translator_type == "copilot" and hasattr(manga_translator, '_copilot_translator') and manga_translator._copilot_translator:
|
| 271 |
+
translator = manga_translator._copilot_translator
|
| 272 |
+
translator_name = "Copilot"
|
| 273 |
+
elif translator_type == "gemini" and hasattr(manga_translator, '_gemini_translator') and manga_translator._gemini_translator:
|
| 274 |
+
translator = manga_translator._gemini_translator
|
| 275 |
+
translator_name = "Gemini"
|
| 276 |
+
else:
|
| 277 |
+
translator = None
|
| 278 |
+
translator_name = "Unknown"
|
| 279 |
+
|
| 280 |
+
if translator:
|
| 281 |
+
print(f"{translator_name} batch translating {len(pages_texts)} pages in chunks of {batch_size}...")
|
| 282 |
+
|
| 283 |
+
# Build full context from ALL pages if context memory is enabled
|
| 284 |
+
all_context = None
|
| 285 |
+
if use_context_memory:
|
| 286 |
+
all_context = pages_texts # Pass all texts for context
|
| 287 |
+
print(f" Using context from all {len(pages_texts)} pages")
|
| 288 |
+
|
| 289 |
+
# Process in batches
|
| 290 |
+
page_names = list(pages_texts.keys())
|
| 291 |
+
|
| 292 |
+
for i in range(0, len(page_names), batch_size):
|
| 293 |
+
batch_names = page_names[i:i + batch_size]
|
| 294 |
+
batch_texts = {name: pages_texts[name] for name in batch_names}
|
| 295 |
+
|
| 296 |
+
print(f" Translating batch {i//batch_size + 1}: pages {i+1}-{min(i+batch_size, len(page_names))}")
|
| 297 |
+
|
| 298 |
+
try:
|
| 299 |
+
translated = translator.translate_pages_batch(
|
| 300 |
+
batch_texts,
|
| 301 |
+
source=manga_translator.source,
|
| 302 |
+
target=manga_translator.target,
|
| 303 |
+
context=all_context if use_context_memory else None
|
| 304 |
+
)
|
| 305 |
+
all_translations.update(translated)
|
| 306 |
+
except Exception as e:
|
| 307 |
+
print(f" Batch failed: {e}, falling back to individual translation")
|
| 308 |
+
for name, texts in batch_texts.items():
|
| 309 |
+
try:
|
| 310 |
+
all_translations[name] = translator.translate_batch(
|
| 311 |
+
texts, manga_translator.source, manga_translator.target
|
| 312 |
+
)
|
| 313 |
+
except:
|
| 314 |
+
all_translations[name] = texts # Return original on error
|
| 315 |
+
|
| 316 |
+
translation_time = time.time() - start_time - detection_time
|
| 317 |
+
print(f"✓ Translation completed in {translation_time:.1f}s")
|
| 318 |
+
emit_progress('translation', 1, 1, 'Dịch hoàn tất')
|
| 319 |
+
|
| 320 |
+
# Phase 4: Apply translations and render text
|
| 321 |
+
emit_progress('rendering', 0, total_images, 'Đang render text vào ảnh...')
|
| 322 |
+
render_start = time.time()
|
| 323 |
+
processed_results = []
|
| 324 |
+
font_path = get_font_path(selected_font)
|
| 325 |
+
|
| 326 |
+
print(f"\n[Phase 4] Rendering text...")
|
| 327 |
+
|
| 328 |
+
render_idx = 0
|
| 329 |
+
for name, data in all_pages_data.items():
|
| 330 |
+
render_idx += 1
|
| 331 |
+
emit_progress('rendering', render_idx, total_images, f'Render text: {name}')
|
| 332 |
+
|
| 333 |
+
image = data['image']
|
| 334 |
+
bubbles = data['bubbles']
|
| 335 |
+
translated_texts = all_translations.get(name, data['texts']) # Fallback to original
|
| 336 |
+
|
| 337 |
+
# Apply text to bubbles on the ORIGINAL image
|
| 338 |
+
for bubble, text in zip(bubbles, translated_texts):
|
| 339 |
+
x1, y1, x2, y2 = bubble['coords']
|
| 340 |
+
# Get the region in the original image (this is a view, modifications affect original)
|
| 341 |
+
bubble_region = image[y1:y2, x1:x2]
|
| 342 |
+
# Fill with white first (process_bubble already did this but let's be safe)
|
| 343 |
+
# bubble_region[:] = (255, 255, 255) # Already done
|
| 344 |
+
# Add translated text
|
| 345 |
+
add_text(bubble_region, text, font_path, bubble['contour'])
|
| 346 |
+
|
| 347 |
+
processed_results.append({
|
| 348 |
+
'image': image,
|
| 349 |
+
'name': name
|
| 350 |
+
})
|
| 351 |
+
|
| 352 |
+
render_time = time.time() - render_start
|
| 353 |
+
total_time = time.time() - start_time
|
| 354 |
+
|
| 355 |
+
print(f"✓ Text rendering completed in {render_time:.1f}s")
|
| 356 |
+
print(f"{'='*50}")
|
| 357 |
+
print(f"✓ TOTAL: {total_images} images processed in {total_time:.1f}s ({total_time/total_images:.1f}s/image)")
|
| 358 |
+
print(f"{'='*50}\n")
|
| 359 |
+
|
| 360 |
+
emit_progress('done', total_images, total_images, f'Hoàn tất! {total_images} ảnh trong {total_time:.1f}s')
|
| 361 |
+
|
| 362 |
+
return processed_results
|
| 363 |
+
|
| 364 |
+
|
| 365 |
@app.route("/translate", methods=["POST"])
|
| 366 |
def upload_file():
|
| 367 |
# Get translator selection
|
| 368 |
translator_map = {
|
| 369 |
"Opus-mt model": "hf",
|
| 370 |
"NLLB": "nllb",
|
| 371 |
+
"Gemini": "gemini",
|
| 372 |
+
"Copilot": "copilot"
|
| 373 |
}
|
| 374 |
selected_translator = translator_map.get(
|
| 375 |
request.form["selected_translator"],
|
| 376 |
request.form["selected_translator"].lower()
|
| 377 |
)
|
| 378 |
+
|
| 379 |
+
# Get Copilot settings if Copilot is selected
|
| 380 |
+
copilot_server = request.form.get("copilot_server", "http://localhost:8080")
|
| 381 |
+
copilot_model = request.form.get("selected_copilot_model", "gpt-4o")
|
| 382 |
+
|
| 383 |
+
# Get Gemini API key from form
|
| 384 |
+
gemini_api_key = request.form.get("gemini_api_key", "").strip()
|
| 385 |
+
|
| 386 |
+
# Get context memory setting (checkbox - "on" if checked, None if not)
|
| 387 |
+
use_context_memory = request.form.get("context_memory") == "on"
|
| 388 |
|
| 389 |
# Get font selection
|
| 390 |
+
selected_font_raw = request.form["selected_font"]
|
| 391 |
+
selected_font = selected_font_raw.lower()
|
| 392 |
+
|
| 393 |
+
# Handle special font name mappings
|
| 394 |
+
if selected_font == "auto (match original)":
|
| 395 |
+
selected_font = "auto"
|
| 396 |
+
elif selected_font == "animeace":
|
| 397 |
+
selected_font = "animeace_"
|
| 398 |
+
elif selected_font_raw.startswith("Yuki-"):
|
| 399 |
+
# Keep original case for Yuki fonts
|
| 400 |
+
selected_font = selected_font_raw
|
| 401 |
|
| 402 |
# Get OCR engine
|
| 403 |
selected_ocr = request.form.get("selected_ocr", "chrome-lens").lower()
|
|
|
|
| 460 |
if selected_translator == "gemini" and style:
|
| 461 |
manga_translator._gemini_custom_prompt = style
|
| 462 |
|
| 463 |
+
# Set Gemini API key
|
| 464 |
+
if selected_translator == "gemini" and gemini_api_key:
|
| 465 |
+
manga_translator._gemini_api_key = gemini_api_key
|
| 466 |
+
print(f"Using Gemini API with provided key")
|
| 467 |
+
|
| 468 |
+
# Set Copilot settings
|
| 469 |
+
if selected_translator == "copilot":
|
| 470 |
+
manga_translator._copilot_server = copilot_server
|
| 471 |
+
manga_translator._copilot_model = copilot_model
|
| 472 |
+
print(f"Using Copilot API: {copilot_server} / model: {copilot_model}")
|
| 473 |
+
|
| 474 |
if selected_ocr == "chrome-lens":
|
| 475 |
mocr = ChromeLensOCR()
|
| 476 |
else:
|
| 477 |
mocr = MangaOcr()
|
| 478 |
|
| 479 |
+
# Initialize font analyzer for auto font matching
|
| 480 |
+
font_analyzer = None
|
| 481 |
+
if selected_font == "auto":
|
| 482 |
+
try:
|
| 483 |
+
from font_analyzer import FontAnalyzer
|
| 484 |
+
# Use same API key as Gemini translator
|
| 485 |
+
api_key = gemini_api_key or os.environ.get("GEMINI_API_KEY")
|
| 486 |
+
if not api_key:
|
| 487 |
+
print("Warning: No Gemini API key provided for font analysis")
|
| 488 |
+
font_analyzer = FontAnalyzer(api_key=api_key)
|
| 489 |
+
print("Font analyzer initialized for auto font matching")
|
| 490 |
+
except Exception as e:
|
| 491 |
+
print(f"Failed to initialize font analyzer: {e}")
|
| 492 |
+
selected_font = "animeace_" # Fallback to default
|
| 493 |
+
|
| 494 |
# Process all images
|
| 495 |
processed_images = []
|
| 496 |
+
auto_font_determined = False # Flag to analyze font only once
|
| 497 |
|
| 498 |
+
# For Copilot and Gemini: Use multi-page batch processing
|
| 499 |
+
if selected_translator in ["copilot", "gemini"]:
|
| 500 |
+
# First, read all images into memory
|
| 501 |
+
all_images = []
|
| 502 |
+
for file in files:
|
| 503 |
+
if file and file.filename:
|
| 504 |
+
try:
|
| 505 |
+
file_stream = file.stream
|
| 506 |
+
file_bytes = np.frombuffer(file_stream.read(), dtype=np.uint8)
|
| 507 |
+
image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
|
| 508 |
+
|
| 509 |
+
if image is None:
|
| 510 |
+
continue
|
| 511 |
+
|
| 512 |
+
name = os.path.splitext(file.filename)[0]
|
| 513 |
+
all_images.append({'image': image, 'name': name})
|
| 514 |
+
except Exception as e:
|
| 515 |
+
print(f"Error reading {file.filename}: {e}")
|
| 516 |
+
|
| 517 |
+
if not all_images:
|
| 518 |
+
return redirect("/")
|
| 519 |
+
|
| 520 |
+
# Auto font: analyze first image
|
| 521 |
+
if selected_font == "auto" and font_analyzer is not None:
|
| 522 |
try:
|
| 523 |
+
results = detect_bubbles(MODEL_PATH, all_images[0]['image'])
|
| 524 |
+
if results:
|
| 525 |
+
x1, y1, x2, y2, _, _ = results[0]
|
| 526 |
+
first_bubble = all_images[0]['image'][int(y1):int(y2), int(x1):int(x2)]
|
| 527 |
+
selected_font = font_analyzer.analyze_and_match(first_bubble)
|
| 528 |
+
print(f"Auto font matched: {selected_font}")
|
| 529 |
+
else:
|
| 530 |
+
selected_font = "animeace_"
|
| 531 |
+
except Exception as e:
|
| 532 |
+
print(f"Font analysis failed: {e}")
|
| 533 |
+
selected_font = "animeace_"
|
| 534 |
+
|
| 535 |
+
# Initialize translator based on type
|
| 536 |
+
if selected_translator == "copilot":
|
| 537 |
+
if not hasattr(manga_translator, '_copilot_translator') or manga_translator._copilot_translator is None:
|
| 538 |
+
from translator.copilot_translator import CopilotTranslator
|
| 539 |
+
manga_translator._copilot_translator = CopilotTranslator(
|
| 540 |
+
server_url=copilot_server,
|
| 541 |
+
model=copilot_model
|
| 542 |
)
|
| 543 |
+
print(f"Copilot translator initialized: {copilot_server} / {copilot_model}")
|
| 544 |
+
|
| 545 |
+
elif selected_translator == "gemini":
|
| 546 |
+
if not hasattr(manga_translator, '_gemini_translator') or manga_translator._gemini_translator is None:
|
| 547 |
+
from translator.gemini_translator import GeminiTranslator
|
| 548 |
+
api_key = gemini_api_key
|
| 549 |
+
if not api_key:
|
| 550 |
+
raise ValueError("Gemini API key required. Please enter it in the web form.")
|
| 551 |
+
custom_prompt = getattr(manga_translator, '_gemini_custom_prompt', None)
|
| 552 |
+
manga_translator._gemini_translator = GeminiTranslator(
|
| 553 |
+
api_key=api_key,
|
| 554 |
+
custom_prompt=custom_prompt
|
| 555 |
+
)
|
| 556 |
+
print("Gemini translator initialized for multi-page batching")
|
| 557 |
+
|
| 558 |
+
# Process with multi-page batching (10 pages per API call)
|
| 559 |
+
processed_results = process_images_with_batch(
|
| 560 |
+
all_images, manga_translator, mocr, selected_font,
|
| 561 |
+
translator_type=selected_translator, batch_size=10,
|
| 562 |
+
use_context_memory=use_context_memory
|
| 563 |
+
)
|
| 564 |
+
|
| 565 |
+
# Encode results to base64
|
| 566 |
+
for result in processed_results:
|
| 567 |
+
try:
|
| 568 |
+
_, buffer = cv2.imencode(".jpg", result['image'], [cv2.IMWRITE_JPEG_QUALITY, 95])
|
| 569 |
encoded_image = base64.b64encode(buffer.tobytes()).decode("utf-8")
|
|
|
|
| 570 |
processed_images.append({
|
| 571 |
+
"name": result['name'],
|
| 572 |
"data": encoded_image
|
| 573 |
})
|
|
|
|
| 574 |
except Exception as e:
|
| 575 |
+
print(f"Error encoding {result['name']}: {e}")
|
| 576 |
+
|
| 577 |
+
else:
|
| 578 |
+
# For other translators: Use per-image processing (original flow)
|
| 579 |
+
for file in files:
|
| 580 |
+
if file and file.filename:
|
| 581 |
+
try:
|
| 582 |
+
# Read image
|
| 583 |
+
file_stream = file.stream
|
| 584 |
+
file_bytes = np.frombuffer(file_stream.read(), dtype=np.uint8)
|
| 585 |
+
image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
|
| 586 |
+
|
| 587 |
+
if image is None:
|
| 588 |
+
continue
|
| 589 |
+
|
| 590 |
+
# Auto font: analyze FIRST image only
|
| 591 |
+
if selected_font == "auto" and font_analyzer is not None and not auto_font_determined:
|
| 592 |
+
try:
|
| 593 |
+
results = detect_bubbles(MODEL_PATH, image)
|
| 594 |
+
if results:
|
| 595 |
+
x1, y1, x2, y2, _, _ = results[0]
|
| 596 |
+
first_bubble = image[int(y1):int(y2), int(x1):int(x2)]
|
| 597 |
+
selected_font = font_analyzer.analyze_and_match(first_bubble)
|
| 598 |
+
print(f"Auto font matched (once for all images): {selected_font}")
|
| 599 |
+
else:
|
| 600 |
+
selected_font = "animeace_"
|
| 601 |
+
except Exception as e:
|
| 602 |
+
print(f"Font analysis failed: {e}")
|
| 603 |
+
selected_font = "animeace_"
|
| 604 |
+
auto_font_determined = True
|
| 605 |
+
|
| 606 |
+
# Get original filename
|
| 607 |
+
name = os.path.splitext(file.filename)[0]
|
| 608 |
+
|
| 609 |
+
# Process image
|
| 610 |
+
processed_image = process_single_image(
|
| 611 |
+
image, manga_translator, mocr,
|
| 612 |
+
selected_translator, selected_font, None
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
+
# Encode to base64
|
| 616 |
+
_, buffer = cv2.imencode(".jpg", processed_image, [cv2.IMWRITE_JPEG_QUALITY, 95])
|
| 617 |
+
encoded_image = base64.b64encode(buffer.tobytes()).decode("utf-8")
|
| 618 |
+
|
| 619 |
+
processed_images.append({
|
| 620 |
+
"name": name,
|
| 621 |
+
"data": encoded_image
|
| 622 |
+
})
|
| 623 |
+
|
| 624 |
+
except Exception as e:
|
| 625 |
+
print(f"Error processing {file.filename}: {e}")
|
| 626 |
+
continue
|
| 627 |
|
| 628 |
if not processed_images:
|
| 629 |
return redirect("/")
|
|
|
|
| 631 |
return render_template("translate.html", images=processed_images)
|
| 632 |
|
| 633 |
|
| 634 |
+
@app.route("/download-zip", methods=["POST"])
|
| 635 |
+
def download_zip():
|
| 636 |
+
"""Create and download a ZIP file containing all translated images."""
|
| 637 |
+
try:
|
| 638 |
+
images_data = request.form.get("images_data", "[]")
|
| 639 |
+
images = json.loads(images_data)
|
| 640 |
+
|
| 641 |
+
if not images:
|
| 642 |
+
return redirect("/")
|
| 643 |
+
|
| 644 |
+
# Create ZIP file in memory
|
| 645 |
+
zip_buffer = io.BytesIO()
|
| 646 |
+
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 647 |
+
for i, img in enumerate(images):
|
| 648 |
+
name = img.get('name', f'image_{i+1}')
|
| 649 |
+
data = img.get('data', '')
|
| 650 |
+
|
| 651 |
+
# Decode base64 to bytes
|
| 652 |
+
image_bytes = base64.b64decode(data)
|
| 653 |
+
|
| 654 |
+
# Add to ZIP with proper filename
|
| 655 |
+
filename = f"{name}_translated.png"
|
| 656 |
+
zip_file.writestr(filename, image_bytes)
|
| 657 |
+
|
| 658 |
+
zip_buffer.seek(0)
|
| 659 |
+
|
| 660 |
+
return send_file(
|
| 661 |
+
zip_buffer,
|
| 662 |
+
mimetype='application/zip',
|
| 663 |
+
as_attachment=True,
|
| 664 |
+
download_name='manga_translated.zip'
|
| 665 |
+
)
|
| 666 |
+
|
| 667 |
+
except Exception as e:
|
| 668 |
+
print(f"Error creating ZIP: {e}")
|
| 669 |
+
return redirect("/")
|
| 670 |
+
|
| 671 |
+
|
| 672 |
if __name__ == "__main__":
|
| 673 |
+
socketio.run(app, debug=True)
|
font_analyzer.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Font Analyzer - Analyze manga font style and match with available fonts
|
| 3 |
+
Uses Gemini Vision to directly select the best matching font from available options
|
| 4 |
+
"""
|
| 5 |
+
import google.generativeai as genai
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
from PIL import Image
|
| 9 |
+
import numpy as np
|
| 10 |
+
from typing import Optional, Dict, Any, List
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class FontAnalyzer:
|
| 14 |
+
"""
|
| 15 |
+
Analyzes font style from manga speech bubbles using Gemini Vision
|
| 16 |
+
and directly selects the best matching font from available fonts.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
# Available fonts with descriptions for Gemini to understand
|
| 20 |
+
FONT_OPTIONS = {
|
| 21 |
+
"animeace_": "Classic manga font, clean and readable, standard comic style",
|
| 22 |
+
"mangat": "Standard manga font, similar to animeace, good readability",
|
| 23 |
+
"arial": "Clean sans-serif, formal and professional",
|
| 24 |
+
"Yuki-Arenzi": "Simple casual handwritten style",
|
| 25 |
+
"Yuki-Burobu": "Bold brush strokes, dynamic action style, Japanese brush feel",
|
| 26 |
+
"Yuki-CCMarianChurchlandJournal": "Journal/diary handwritten, personal feel",
|
| 27 |
+
"Yuki-CDX Starstreak": "Dynamic sci-fi style, bold and futuristic",
|
| 28 |
+
"Yuki-CHICKEN Pie": "Playful, chunky, cute comedy style",
|
| 29 |
+
"Yuki-CrashLanding BB": "Heavy impact font, bold action/shouting style",
|
| 30 |
+
"Yuki-Downhill Dive": "Dynamic sports/action font, energetic",
|
| 31 |
+
"Yuki-Gingerline DEMO Regular": "Elegant flowing handwritten, romantic style",
|
| 32 |
+
"Yuki-Gorrilaz_Story": "Grunge alternative style, rough edges",
|
| 33 |
+
"Yuki-KG Only Angel": "Delicate feminine handwritten, soft romantic",
|
| 34 |
+
"Yuki-LF SwandsHand": "Natural handwritten, casual personal",
|
| 35 |
+
"Yuki-La Belle Aurore": "Elegant cursive, fancy romantic style",
|
| 36 |
+
"Yuki-Little Cupcakes": "Cute kawaii style, bubbly and fun",
|
| 37 |
+
"Yuki-Nagurigaki Crayon": "Crayon/childish handwritten, playful comedy",
|
| 38 |
+
"Yuki-Ripsnort BB": "Heavy bold impact, action/shouting",
|
| 39 |
+
"Yuki-Roasthink": "Modern clean sans-serif, general purpose",
|
| 40 |
+
"Yuki-Screwball": "Comic style, funny and expressive",
|
| 41 |
+
"Yuki-Shark Crash": "Aggressive dynamic, action manga style",
|
| 42 |
+
"Yuki-Skulduggery": "Gothic dark style, horror/mystery",
|
| 43 |
+
"Yuki-Superscratchy": "Scratchy rough handwritten, grungy feel",
|
| 44 |
+
"Yuki-Tea And Oranges Regular": "Soft warm handwritten, gentle drama",
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
DEFAULT_FONT = "animeace_"
|
| 48 |
+
|
| 49 |
+
def __init__(self, api_key: str = None):
|
| 50 |
+
"""Initialize with Gemini API key."""
|
| 51 |
+
self.api_key = api_key or os.environ.get("GEMINI_API_KEY")
|
| 52 |
+
if not self.api_key:
|
| 53 |
+
raise ValueError("Gemini API key required. Set GEMINI_API_KEY or pass api_key.")
|
| 54 |
+
|
| 55 |
+
genai.configure(api_key=self.api_key)
|
| 56 |
+
self.model = genai.GenerativeModel("gemini-2.5-flash-lite")
|
| 57 |
+
|
| 58 |
+
def _image_to_pil(self, image) -> Image.Image:
|
| 59 |
+
"""Convert various image formats to PIL Image."""
|
| 60 |
+
if isinstance(image, Image.Image):
|
| 61 |
+
return image
|
| 62 |
+
elif isinstance(image, np.ndarray):
|
| 63 |
+
import cv2
|
| 64 |
+
if len(image.shape) == 3 and image.shape[2] == 3:
|
| 65 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 66 |
+
return Image.fromarray(image)
|
| 67 |
+
else:
|
| 68 |
+
raise ValueError(f"Unsupported image type: {type(image)}")
|
| 69 |
+
|
| 70 |
+
def _build_font_list_prompt(self) -> str:
|
| 71 |
+
"""Build the font options list for the prompt."""
|
| 72 |
+
lines = []
|
| 73 |
+
for font_name, description in self.FONT_OPTIONS.items():
|
| 74 |
+
lines.append(f"- {font_name}: {description}")
|
| 75 |
+
return "\n".join(lines)
|
| 76 |
+
|
| 77 |
+
def analyze_and_match(self, bubble_image) -> str:
|
| 78 |
+
"""
|
| 79 |
+
Analyze the font in the image and directly select the best matching font.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
bubble_image: Speech bubble image (PIL, numpy array)
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
Font name to use
|
| 86 |
+
"""
|
| 87 |
+
try:
|
| 88 |
+
pil_image = self._image_to_pil(bubble_image)
|
| 89 |
+
print(f"[FontAnalyzer] Analyzing image size: {pil_image.size}")
|
| 90 |
+
|
| 91 |
+
font_list = self._build_font_list_prompt()
|
| 92 |
+
|
| 93 |
+
prompt = f"""Look at this manga/comic speech bubble image and analyze the text font style.
|
| 94 |
+
|
| 95 |
+
Then choose the BEST matching font from this list based on visual similarity:
|
| 96 |
+
|
| 97 |
+
{font_list}
|
| 98 |
+
|
| 99 |
+
Consider these factors when matching:
|
| 100 |
+
1. Font weight (thin, normal, bold, heavy)
|
| 101 |
+
2. Style (clean, handwritten, decorative, brush)
|
| 102 |
+
3. Mood/genre (action, comedy, romance, horror, drama, casual)
|
| 103 |
+
4. Overall visual feel
|
| 104 |
+
|
| 105 |
+
Return ONLY the font name (exactly as written above), nothing else.
|
| 106 |
+
Example response: Yuki-Burobu"""
|
| 107 |
+
|
| 108 |
+
print("[FontAnalyzer] Sending request to Gemini Vision...")
|
| 109 |
+
response = self.model.generate_content([prompt, pil_image])
|
| 110 |
+
result = response.text.strip()
|
| 111 |
+
|
| 112 |
+
print(f"[FontAnalyzer] Gemini raw response: '{result}'")
|
| 113 |
+
|
| 114 |
+
# Clean up response
|
| 115 |
+
result = result.replace('"', '').replace("'", "").strip()
|
| 116 |
+
|
| 117 |
+
# Remove common prefixes that Gemini might add
|
| 118 |
+
prefixes_to_remove = ["The best matching font is ", "Best match: ", "Font: ", "I recommend "]
|
| 119 |
+
for prefix in prefixes_to_remove:
|
| 120 |
+
if result.lower().startswith(prefix.lower()):
|
| 121 |
+
result = result[len(prefix):].strip()
|
| 122 |
+
|
| 123 |
+
print(f"[FontAnalyzer] Cleaned response: '{result}'")
|
| 124 |
+
|
| 125 |
+
# Validate the result is in our font list
|
| 126 |
+
if result in self.FONT_OPTIONS:
|
| 127 |
+
print(f"[FontAnalyzer] ✓ Matched: {result}")
|
| 128 |
+
return result
|
| 129 |
+
|
| 130 |
+
# Try to find partial match (case-insensitive)
|
| 131 |
+
result_lower = result.lower()
|
| 132 |
+
for font_name in self.FONT_OPTIONS.keys():
|
| 133 |
+
if font_name.lower() == result_lower:
|
| 134 |
+
print(f"[FontAnalyzer] ✓ Matched (case-insensitive): {font_name}")
|
| 135 |
+
return font_name
|
| 136 |
+
if font_name.lower() in result_lower or result_lower in font_name.lower():
|
| 137 |
+
print(f"[FontAnalyzer] ✓ Matched (partial): {font_name}")
|
| 138 |
+
return font_name
|
| 139 |
+
|
| 140 |
+
print(f"[FontAnalyzer] ✗ Font not in list: '{result}', using default")
|
| 141 |
+
return self.DEFAULT_FONT
|
| 142 |
+
|
| 143 |
+
except Exception as e:
|
| 144 |
+
print(f"[FontAnalyzer] ✗ Error: {e}")
|
| 145 |
+
return self.DEFAULT_FONT
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def get_matching_font(bubble_image, api_key: str = None) -> str:
|
| 149 |
+
"""Quick function to analyze and match font from a bubble image."""
|
| 150 |
+
analyzer = FontAnalyzer(api_key)
|
| 151 |
+
return analyzer.analyze_and_match(bubble_image)
|
fonts/Yuki-Arenzi.ttf
ADDED
|
Binary file (47.8 kB). View file
|
|
|
fonts/Yuki-Burobu.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09a3b22d7035b4726304fb383cf80e2421c47cf05615d2f75143b24147bcef7a
|
| 3 |
+
size 176976
|
fonts/Yuki-CCMarianChurchlandJournal.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c5c4ac2b3daf8f7062d745300b2e8dd12b2ee206db7dd427143cc3f78a8e831
|
| 3 |
+
size 148928
|
fonts/Yuki-CDX Starstreak.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:931d143968eca5b237efdba4538ddd79a8113a438c9d0b479244a660cc099973
|
| 3 |
+
size 152740
|
fonts/Yuki-CHICKEN Pie.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12b1128c44ecc4819fc67615966260cca27ac68a6b21f4c2a99d697656f3cfe2
|
| 3 |
+
size 100624
|
fonts/Yuki-CrashLanding BB.ttf
ADDED
|
Binary file (49.4 kB). View file
|
|
|
fonts/Yuki-Downhill Dive.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a19185b9489d1ff3897d178e7859676d58c1a8ab81beee9e93b662a1a8a0383d
|
| 3 |
+
size 345480
|
fonts/Yuki-Gingerline DEMO Regular.ttf
ADDED
|
Binary file (82.5 kB). View file
|
|
|
fonts/Yuki-Gorrilaz_Story.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:331cec198091d01a819b0dfb4be4576cdc27a0774397a4ec7a9b10a527a5d161
|
| 3 |
+
size 115792
|
fonts/Yuki-KG Only Angel.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f07ee1dfc8c198e19dfe1101b2bb1d84c80592ed54fc6caf2412e50d36b22903
|
| 3 |
+
size 440976
|
fonts/Yuki-LF SwandsHand.ttf
ADDED
|
Binary file (70.9 kB). View file
|
|
|
fonts/Yuki-La Belle Aurore.ttf
ADDED
|
Binary file (88.9 kB). View file
|
|
|
fonts/Yuki-Little Cupcakes.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30a1f57bac5c5fcb5739008d33ede51e5ecc8a76a39f268ecbeb4b0c0e45fa68
|
| 3 |
+
size 114520
|
fonts/Yuki-Nagurigaki Crayon.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bece77302985b034d3e7beff562853e17084b87d2b2fef6ba784fdc953660586
|
| 3 |
+
size 5462384
|
fonts/Yuki-Ripsnort BB.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf74ab38ba5767007fbc4b0cf8cfa432620a22d431be3d4b944df1dd3ca1b2f3
|
| 3 |
+
size 115368
|
fonts/Yuki-Roasthink.ttf
ADDED
|
Binary file (68.4 kB). View file
|
|
|
fonts/Yuki-Screwball.ttf
ADDED
|
Binary file (99.1 kB). View file
|
|
|
fonts/Yuki-Shark Crash.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12195563cabdc781dffa6517ad1f029fc69dcb968e9ff49ec68f3c7216cc4c3c
|
| 3 |
+
size 148464
|
fonts/Yuki-Skulduggery.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06cbbcaa3cfcbf96482c18301130e48405c06ff205c5a225abb44d5b56f7d299
|
| 3 |
+
size 434812
|
fonts/Yuki-Superscratchy.ttf
ADDED
|
Binary file (68.4 kB). View file
|
|
|
fonts/Yuki-Tea And Oranges Regular.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a448eb8e6301e6059678171d36c95049723d6a3d26fcb4d33b9b62e40397df9
|
| 3 |
+
size 492108
|
ocr/__pycache__/chrome_lens_ocr.cpython-311.pyc
CHANGED
|
Binary files a/ocr/__pycache__/chrome_lens_ocr.cpython-311.pyc and b/ocr/__pycache__/chrome_lens_ocr.cpython-311.pyc differ
|
|
|
ocr/chrome_lens_ocr.py
CHANGED
|
@@ -17,6 +17,7 @@ class ChromeLensOCR:
|
|
| 17 |
- Free Google Lens OCR API
|
| 18 |
- Multi-language support with auto-detection
|
| 19 |
- Text block segmentation for comics/manga
|
|
|
|
| 20 |
"""
|
| 21 |
|
| 22 |
def __init__(self, ocr_language: str = "ja"):
|
|
@@ -77,6 +78,62 @@ class ChromeLensOCR:
|
|
| 77 |
print(f"Chrome Lens OCR error: {e}")
|
| 78 |
return ""
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
async def process_with_blocks(self, image) -> dict:
|
| 81 |
"""
|
| 82 |
Process image and return text segmented into blocks.
|
|
@@ -114,3 +171,4 @@ class ChromeLensOCR:
|
|
| 114 |
|
| 115 |
result = asyncio.run(self.process_with_blocks(image))
|
| 116 |
return result.get("text_blocks", [])
|
|
|
|
|
|
| 17 |
- Free Google Lens OCR API
|
| 18 |
- Multi-language support with auto-detection
|
| 19 |
- Text block segmentation for comics/manga
|
| 20 |
+
- Batch processing for faster multi-image OCR
|
| 21 |
"""
|
| 22 |
|
| 23 |
def __init__(self, ocr_language: str = "ja"):
|
|
|
|
| 78 |
print(f"Chrome Lens OCR error: {e}")
|
| 79 |
return ""
|
| 80 |
|
| 81 |
+
def process_batch(self, images: list) -> list:
|
| 82 |
+
"""
|
| 83 |
+
Process multiple images concurrently for faster OCR.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
images: List of PIL Images or numpy arrays
|
| 87 |
+
|
| 88 |
+
Returns:
|
| 89 |
+
list: List of extracted texts in same order
|
| 90 |
+
"""
|
| 91 |
+
# Convert numpy arrays to PIL Images
|
| 92 |
+
pil_images = []
|
| 93 |
+
for img in images:
|
| 94 |
+
if isinstance(img, np.ndarray):
|
| 95 |
+
pil_images.append(Image.fromarray(img))
|
| 96 |
+
else:
|
| 97 |
+
pil_images.append(img)
|
| 98 |
+
|
| 99 |
+
# Run batch processing
|
| 100 |
+
try:
|
| 101 |
+
loop = asyncio.get_running_loop()
|
| 102 |
+
import concurrent.futures
|
| 103 |
+
future = asyncio.run_coroutine_threadsafe(
|
| 104 |
+
self._process_batch(pil_images), loop
|
| 105 |
+
)
|
| 106 |
+
return future.result(timeout=120)
|
| 107 |
+
except RuntimeError:
|
| 108 |
+
if not hasattr(self, '_loop') or self._loop.is_closed():
|
| 109 |
+
self._loop = asyncio.new_event_loop()
|
| 110 |
+
return self._loop.run_until_complete(self._process_batch(pil_images))
|
| 111 |
+
|
| 112 |
+
async def _process_batch(self, images: list) -> list:
|
| 113 |
+
"""
|
| 114 |
+
Async batch processing using asyncio.gather for concurrent OCR.
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
images: List of PIL Images
|
| 118 |
+
|
| 119 |
+
Returns:
|
| 120 |
+
list: List of extracted texts
|
| 121 |
+
"""
|
| 122 |
+
# Process all images concurrently
|
| 123 |
+
tasks = [self._process(img) for img in images]
|
| 124 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 125 |
+
|
| 126 |
+
# Handle any exceptions
|
| 127 |
+
processed = []
|
| 128 |
+
for r in results:
|
| 129 |
+
if isinstance(r, Exception):
|
| 130 |
+
print(f"Batch OCR error: {r}")
|
| 131 |
+
processed.append("")
|
| 132 |
+
else:
|
| 133 |
+
processed.append(r)
|
| 134 |
+
|
| 135 |
+
return processed
|
| 136 |
+
|
| 137 |
async def process_with_blocks(self, image) -> dict:
|
| 138 |
"""
|
| 139 |
Process image and return text segmented into blocks.
|
|
|
|
| 171 |
|
| 172 |
result = asyncio.run(self.process_with_blocks(image))
|
| 173 |
return result.get("text_blocks", [])
|
| 174 |
+
|
process_bubble.py
CHANGED
|
@@ -11,12 +11,22 @@ def process_bubble(image):
|
|
| 11 |
|
| 12 |
Returns:
|
| 13 |
- image (numpy.ndarray): Image with the speech bubble content set to white.
|
| 14 |
-
- largest_contour (numpy.ndarray): Contour of the detected speech bubble.
|
| 15 |
"""
|
| 16 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 17 |
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
|
| 18 |
|
| 19 |
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
largest_contour = max(contours, key=cv2.contourArea)
|
| 21 |
|
| 22 |
mask = np.zeros_like(gray)
|
|
@@ -25,3 +35,4 @@ def process_bubble(image):
|
|
| 25 |
image[mask == 255] = (255, 255, 255)
|
| 26 |
|
| 27 |
return image, largest_contour
|
|
|
|
|
|
| 11 |
|
| 12 |
Returns:
|
| 13 |
- image (numpy.ndarray): Image with the speech bubble content set to white.
|
| 14 |
+
- largest_contour (numpy.ndarray): Contour of the detected speech bubble (or None if not found).
|
| 15 |
"""
|
| 16 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 17 |
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
|
| 18 |
|
| 19 |
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 20 |
+
|
| 21 |
+
# Handle case when no contours found
|
| 22 |
+
if not contours:
|
| 23 |
+
# Return original image with a simple rectangular contour
|
| 24 |
+
h, w = image.shape[:2]
|
| 25 |
+
largest_contour = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype=np.int32)
|
| 26 |
+
# Fill with white anyway
|
| 27 |
+
image[:] = (255, 255, 255)
|
| 28 |
+
return image, largest_contour
|
| 29 |
+
|
| 30 |
largest_contour = max(contours, key=cv2.contourArea)
|
| 31 |
|
| 32 |
mask = np.zeros_like(gray)
|
|
|
|
| 35 |
image[mask == 255] = (255, 255, 255)
|
| 36 |
|
| 37 |
return image, largest_contour
|
| 38 |
+
|
static/css/style.css
CHANGED
|
@@ -334,6 +334,55 @@ button:active {
|
|
| 334 |
color: white;
|
| 335 |
}
|
| 336 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
/* Responsive */
|
| 338 |
@media (max-width: 600px) {
|
| 339 |
.form-grid {
|
|
@@ -344,3 +393,4 @@ button:active {
|
|
| 344 |
padding: 20px;
|
| 345 |
}
|
| 346 |
}
|
|
|
|
|
|
| 334 |
color: white;
|
| 335 |
}
|
| 336 |
|
| 337 |
+
/* Toggle Switch */
|
| 338 |
+
.toggle-container {
|
| 339 |
+
display: flex;
|
| 340 |
+
align-items: center;
|
| 341 |
+
cursor: pointer;
|
| 342 |
+
gap: 12px;
|
| 343 |
+
user-select: none;
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
.toggle-container input {
|
| 347 |
+
display: none;
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
.toggle-slider {
|
| 351 |
+
position: relative;
|
| 352 |
+
width: 50px;
|
| 353 |
+
height: 26px;
|
| 354 |
+
background-color: #ccc;
|
| 355 |
+
border-radius: 26px;
|
| 356 |
+
transition: background-color 0.3s;
|
| 357 |
+
flex-shrink: 0;
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
.toggle-slider::before {
|
| 361 |
+
content: '';
|
| 362 |
+
position: absolute;
|
| 363 |
+
width: 22px;
|
| 364 |
+
height: 22px;
|
| 365 |
+
border-radius: 50%;
|
| 366 |
+
background-color: white;
|
| 367 |
+
top: 2px;
|
| 368 |
+
left: 2px;
|
| 369 |
+
transition: transform 0.3s;
|
| 370 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
.toggle-container input:checked + .toggle-slider {
|
| 374 |
+
background-color: #5E1675;
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
.toggle-container input:checked + .toggle-slider::before {
|
| 378 |
+
transform: translateX(24px);
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
.toggle-label {
|
| 382 |
+
font-size: 13px;
|
| 383 |
+
color: #333;
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
/* Responsive */
|
| 387 |
@media (max-width: 600px) {
|
| 388 |
.form-grid {
|
|
|
|
| 393 |
padding: 20px;
|
| 394 |
}
|
| 395 |
}
|
| 396 |
+
|
static/js/app.js
CHANGED
|
@@ -38,6 +38,23 @@ document.addEventListener("DOMContentLoaded", () => {
|
|
| 38 |
customWrapper.style.display = 'none';
|
| 39 |
}
|
| 40 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
});
|
| 42 |
});
|
| 43 |
|
|
@@ -49,6 +66,33 @@ document.addEventListener("DOMContentLoaded", () => {
|
|
| 49 |
}
|
| 50 |
});
|
| 51 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
});
|
| 53 |
|
| 54 |
// Handles multiple file upload change event
|
|
@@ -108,6 +152,17 @@ function updateHiddenInputs() {
|
|
| 108 |
document.getElementById("selected_style").value = getSelectedText("style");
|
| 109 |
document.getElementById("selected_font").value = getSelectedText("font");
|
| 110 |
document.getElementById("selected_ocr").value = getSelectedText("ocr");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
// Check if files are selected
|
| 113 |
const files = document.getElementById('file-upload').files;
|
|
|
|
| 38 |
customWrapper.style.display = 'none';
|
| 39 |
}
|
| 40 |
}
|
| 41 |
+
|
| 42 |
+
// Show/hide translator-specific settings
|
| 43 |
+
if (selectBox.id === 'translator') {
|
| 44 |
+
const copilotSettings = document.getElementById('copilot-settings');
|
| 45 |
+
const geminiSettings = document.getElementById('gemini-settings');
|
| 46 |
+
|
| 47 |
+
if (option.textContent === 'Copilot') {
|
| 48 |
+
copilotSettings.style.display = 'block';
|
| 49 |
+
geminiSettings.style.display = 'none';
|
| 50 |
+
} else if (option.textContent === 'Gemini') {
|
| 51 |
+
copilotSettings.style.display = 'none';
|
| 52 |
+
geminiSettings.style.display = 'block';
|
| 53 |
+
} else {
|
| 54 |
+
copilotSettings.style.display = 'none';
|
| 55 |
+
geminiSettings.style.display = 'none';
|
| 56 |
+
}
|
| 57 |
+
}
|
| 58 |
});
|
| 59 |
});
|
| 60 |
|
|
|
|
| 66 |
}
|
| 67 |
});
|
| 68 |
});
|
| 69 |
+
|
| 70 |
+
// Load saved Gemini API key from localStorage
|
| 71 |
+
const geminiKeyInput = document.getElementById('gemini_api_key');
|
| 72 |
+
if (geminiKeyInput) {
|
| 73 |
+
const savedKey = localStorage.getItem('gemini_api_key');
|
| 74 |
+
if (savedKey) {
|
| 75 |
+
geminiKeyInput.value = savedKey;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
// Save to localStorage on input change
|
| 79 |
+
geminiKeyInput.addEventListener('input', () => {
|
| 80 |
+
localStorage.setItem('gemini_api_key', geminiKeyInput.value);
|
| 81 |
+
});
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
// Load saved Copilot server URL from localStorage
|
| 85 |
+
const copilotServerInput = document.getElementById('copilot_server');
|
| 86 |
+
if (copilotServerInput) {
|
| 87 |
+
const savedServer = localStorage.getItem('copilot_server');
|
| 88 |
+
if (savedServer) {
|
| 89 |
+
copilotServerInput.value = savedServer;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
copilotServerInput.addEventListener('input', () => {
|
| 93 |
+
localStorage.setItem('copilot_server', copilotServerInput.value);
|
| 94 |
+
});
|
| 95 |
+
}
|
| 96 |
});
|
| 97 |
|
| 98 |
// Handles multiple file upload change event
|
|
|
|
| 152 |
document.getElementById("selected_style").value = getSelectedText("style");
|
| 153 |
document.getElementById("selected_font").value = getSelectedText("font");
|
| 154 |
document.getElementById("selected_ocr").value = getSelectedText("ocr");
|
| 155 |
+
document.getElementById("selected_copilot_model").value = getSelectedText("copilot_model");
|
| 156 |
+
|
| 157 |
+
// Validate Gemini API key if Gemini is selected
|
| 158 |
+
const translator = getSelectedText("translator");
|
| 159 |
+
if (translator === 'Gemini') {
|
| 160 |
+
const apiKey = document.getElementById('gemini_api_key').value;
|
| 161 |
+
if (!apiKey || apiKey.trim() === '') {
|
| 162 |
+
alert('Vui lòng nhập Gemini API Key!');
|
| 163 |
+
return false;
|
| 164 |
+
}
|
| 165 |
+
}
|
| 166 |
|
| 167 |
// Check if files are selected
|
| 168 |
const files = document.getElementById('file-upload').files;
|
templates/index.html
CHANGED
|
@@ -70,6 +70,7 @@
|
|
| 70 |
</div>
|
| 71 |
<div class="options">
|
| 72 |
<span class="option">Gemini</span>
|
|
|
|
| 73 |
<span class="option">Google</span>
|
| 74 |
<span class="option">NLLB</span>
|
| 75 |
<span class="option">Baidu</span>
|
|
@@ -107,9 +108,31 @@
|
|
| 107 |
<span class="icon">▼</span>
|
| 108 |
</div>
|
| 109 |
<div class="options">
|
|
|
|
| 110 |
<span class="option">Animeace</span>
|
| 111 |
<span class="option">Mangat</span>
|
| 112 |
<span class="option">Arial</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
</div>
|
| 114 |
</div>
|
| 115 |
</div>
|
|
@@ -129,6 +152,15 @@
|
|
| 129 |
</div>
|
| 130 |
</div>
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
<!-- Custom Prompt (show when Custom selected) -->
|
| 133 |
<div class="select-wrapper full-width" id="custom-prompt-wrapper" style="display: none;">
|
| 134 |
<label class="translator-label">Custom Prompt</label>
|
|
@@ -136,6 +168,67 @@
|
|
| 136 |
placeholder="Ví dụ: Dịch theo phong cách light novel, giữ nguyên tên nhân vật..." rows="2"></textarea>
|
| 137 |
</div>
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
<!-- File upload -->
|
| 140 |
<input id="file-upload" type="file" name="files" accept=".jpg, .jpeg, .png" multiple required>
|
| 141 |
<label for="file-upload" class="file" id="file-label">
|
|
@@ -149,13 +242,68 @@
|
|
| 149 |
<input type="hidden" id="selected_style" name="selected_style">
|
| 150 |
<input type="hidden" id="selected_font" name="selected_font">
|
| 151 |
<input type="hidden" id="selected_ocr" name="selected_ocr">
|
|
|
|
| 152 |
<button type="submit">Translate</button>
|
| 153 |
</form>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
<img id="loading-img" src="{{ url_for('static', filename='img/loading.gif') }}" alt="">
|
| 155 |
<p id="loading-p">Đang xử lý... Vui lòng đợi!</p>
|
| 156 |
</div>
|
| 157 |
|
|
|
|
|
|
|
| 158 |
<script src="{{ url_for('static', filename='js/app.js') }}"></script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
</body>
|
| 160 |
|
| 161 |
</html>
|
|
|
|
| 70 |
</div>
|
| 71 |
<div class="options">
|
| 72 |
<span class="option">Gemini</span>
|
| 73 |
+
<span class="option">Copilot</span>
|
| 74 |
<span class="option">Google</span>
|
| 75 |
<span class="option">NLLB</span>
|
| 76 |
<span class="option">Baidu</span>
|
|
|
|
| 108 |
<span class="icon">▼</span>
|
| 109 |
</div>
|
| 110 |
<div class="options">
|
| 111 |
+
<span class="option">Auto (Match Original)</span>
|
| 112 |
<span class="option">Animeace</span>
|
| 113 |
<span class="option">Mangat</span>
|
| 114 |
<span class="option">Arial</span>
|
| 115 |
+
<span class="option">Yuki-Arenzi</span>
|
| 116 |
+
<span class="option">Yuki-Burobu</span>
|
| 117 |
+
<span class="option">Yuki-CCMarianChurchlandJournal</span>
|
| 118 |
+
<span class="option">Yuki-CDX Starstreak</span>
|
| 119 |
+
<span class="option">Yuki-CHICKEN Pie</span>
|
| 120 |
+
<span class="option">Yuki-CrashLanding BB</span>
|
| 121 |
+
<span class="option">Yuki-Downhill Dive</span>
|
| 122 |
+
<span class="option">Yuki-Gingerline DEMO Regular</span>
|
| 123 |
+
<span class="option">Yuki-Gorrilaz_Story</span>
|
| 124 |
+
<span class="option">Yuki-KG Only Angel</span>
|
| 125 |
+
<span class="option">Yuki-LF SwandsHand</span>
|
| 126 |
+
<span class="option">Yuki-La Belle Aurore</span>
|
| 127 |
+
<span class="option">Yuki-Little Cupcakes</span>
|
| 128 |
+
<span class="option">Yuki-Nagurigaki Crayon</span>
|
| 129 |
+
<span class="option">Yuki-Ripsnort BB</span>
|
| 130 |
+
<span class="option">Yuki-Roasthink</span>
|
| 131 |
+
<span class="option">Yuki-Screwball</span>
|
| 132 |
+
<span class="option">Yuki-Shark Crash</span>
|
| 133 |
+
<span class="option">Yuki-Skulduggery</span>
|
| 134 |
+
<span class="option">Yuki-Superscratchy</span>
|
| 135 |
+
<span class="option">Yuki-Tea And Oranges Regular</span>
|
| 136 |
</div>
|
| 137 |
</div>
|
| 138 |
</div>
|
|
|
|
| 152 |
</div>
|
| 153 |
</div>
|
| 154 |
|
| 155 |
+
<!-- Context Memory Toggle -->
|
| 156 |
+
<div class="select-wrapper full-width" style="margin-top: 10px;">
|
| 157 |
+
<label class="toggle-container">
|
| 158 |
+
<input type="checkbox" id="context_memory" name="context_memory" checked>
|
| 159 |
+
<span class="toggle-slider"></span>
|
| 160 |
+
<span class="toggle-label">🧠 Context Memory (dùng context từ tất cả ảnh để dịch chính xác hơn)</span>
|
| 161 |
+
</label>
|
| 162 |
+
</div>
|
| 163 |
+
|
| 164 |
<!-- Custom Prompt (show when Custom selected) -->
|
| 165 |
<div class="select-wrapper full-width" id="custom-prompt-wrapper" style="display: none;">
|
| 166 |
<label class="translator-label">Custom Prompt</label>
|
|
|
|
| 168 |
placeholder="Ví dụ: Dịch theo phong cách light novel, giữ nguyên tên nhân vật..." rows="2"></textarea>
|
| 169 |
</div>
|
| 170 |
|
| 171 |
+
<!-- Copilot Settings (show when Copilot selected) -->
|
| 172 |
+
<div id="copilot-settings" style="display: none; width: 100%;">
|
| 173 |
+
<div class="form-grid">
|
| 174 |
+
<div class="select-wrapper">
|
| 175 |
+
<label class="translator-label">Copilot Server URL</label>
|
| 176 |
+
<input type="text" id="copilot_server" name="copilot_server" value="http://localhost:8080"
|
| 177 |
+
placeholder="http://localhost:8080"
|
| 178 |
+
style="width: 100%; padding: 10px 14px; border: 1px solid #ddd; border-radius: 8px; font-size: 14px;">
|
| 179 |
+
</div>
|
| 180 |
+
<div class="select-wrapper">
|
| 181 |
+
<label class="translator-label">Model</label>
|
| 182 |
+
<div class="custom-select" id="copilot_model" tabindex="0">
|
| 183 |
+
<div class="select-box">
|
| 184 |
+
<span class="selected"></span>
|
| 185 |
+
<span class="icon">▼</span>
|
| 186 |
+
</div>
|
| 187 |
+
<div class="options">
|
| 188 |
+
<!-- ⭐ FREE Unlimited Models -->
|
| 189 |
+
<span class="option">gpt-4.1</span>
|
| 190 |
+
<span class="option">gpt-4o</span>
|
| 191 |
+
<span class="option">gpt-5-mini</span>
|
| 192 |
+
<span class="option">grok-code-fast-1</span>
|
| 193 |
+
<span class="option">oswe-vscode-prime</span>
|
| 194 |
+
<!-- Other Models -->
|
| 195 |
+
<span class="option">gpt-5</span>
|
| 196 |
+
<span class="option">gpt-5.1</span>
|
| 197 |
+
<span class="option">gpt-5.1-codex</span>
|
| 198 |
+
<span class="option">gpt-5.1-codex-mini</span>
|
| 199 |
+
<span class="option">gpt-5.1-codex-max</span>
|
| 200 |
+
<span class="option">gpt-5-codex</span>
|
| 201 |
+
<span class="option">gpt-41-copilot</span>
|
| 202 |
+
<span class="option">gpt-4o-mini</span>
|
| 203 |
+
<span class="option">gpt-4o-2024-11-20</span>
|
| 204 |
+
<span class="option">gpt-4</span>
|
| 205 |
+
<span class="option">gpt-4-0125-preview</span>
|
| 206 |
+
<span class="option">gpt-3.5-turbo</span>
|
| 207 |
+
<span class="option">claude-sonnet-4.5</span>
|
| 208 |
+
<span class="option">claude-sonnet-4</span>
|
| 209 |
+
<span class="option">claude-opus-4.5</span>
|
| 210 |
+
<span class="option">claude-haiku-4.5</span>
|
| 211 |
+
<span class="option">gemini-3-pro-preview</span>
|
| 212 |
+
<span class="option">gemini-2.5-pro</span>
|
| 213 |
+
</div>
|
| 214 |
+
</div>
|
| 215 |
+
</div>
|
| 216 |
+
</div>
|
| 217 |
+
</div>
|
| 218 |
+
|
| 219 |
+
<!-- Gemini Settings (show when Gemini selected) -->
|
| 220 |
+
<div id="gemini-settings" style="display: block; width: 100%;">
|
| 221 |
+
<div class="select-wrapper">
|
| 222 |
+
<label class="translator-label">Gemini API Key</label>
|
| 223 |
+
<input type="password" id="gemini_api_key" name="gemini_api_key"
|
| 224 |
+
placeholder="Nhập API key của bạn (lấy từ ai.google.dev)"
|
| 225 |
+
style="width: 100%; padding: 10px 14px; border: 1px solid #ddd; border-radius: 8px; font-size: 14px;">
|
| 226 |
+
<small style="color: #666; font-size: 12px; margin-top: 4px; display: block;">
|
| 227 |
+
🔒 Key được lưu trong trình duyệt của bạn (localStorage)
|
| 228 |
+
</small>
|
| 229 |
+
</div>
|
| 230 |
+
</div>
|
| 231 |
+
|
| 232 |
<!-- File upload -->
|
| 233 |
<input id="file-upload" type="file" name="files" accept=".jpg, .jpeg, .png" multiple required>
|
| 234 |
<label for="file-upload" class="file" id="file-label">
|
|
|
|
| 242 |
<input type="hidden" id="selected_style" name="selected_style">
|
| 243 |
<input type="hidden" id="selected_font" name="selected_font">
|
| 244 |
<input type="hidden" id="selected_ocr" name="selected_ocr">
|
| 245 |
+
<input type="hidden" id="selected_copilot_model" name="selected_copilot_model">
|
| 246 |
<button type="submit">Translate</button>
|
| 247 |
</form>
|
| 248 |
+
|
| 249 |
+
<!-- Progress Bar -->
|
| 250 |
+
<div id="progress-container" style="display: none; margin-top: 20px;">
|
| 251 |
+
<div id="progress-phase" style="font-size: 12px; color: #666; margin-bottom: 5px; text-align: center;"></div>
|
| 252 |
+
<div style="background: #e0e0e0; border-radius: 10px; overflow: hidden; height: 20px;">
|
| 253 |
+
<div id="progress-bar"
|
| 254 |
+
style="height: 100%; background: linear-gradient(90deg, #5E1675, #8e44ad); width: 0%; transition: width 0.3s ease;">
|
| 255 |
+
</div>
|
| 256 |
+
</div>
|
| 257 |
+
<div id="progress-text" style="font-size: 13px; color: #333; margin-top: 8px; text-align: center;"></div>
|
| 258 |
+
</div>
|
| 259 |
+
|
| 260 |
<img id="loading-img" src="{{ url_for('static', filename='img/loading.gif') }}" alt="">
|
| 261 |
<p id="loading-p">Đang xử lý... Vui lòng đợi!</p>
|
| 262 |
</div>
|
| 263 |
|
| 264 |
+
<!-- Socket.IO for real-time progress -->
|
| 265 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.7.2/socket.io.min.js"></script>
|
| 266 |
<script src="{{ url_for('static', filename='js/app.js') }}"></script>
|
| 267 |
+
<script>
|
| 268 |
+
// Real-time progress updates
|
| 269 |
+
document.addEventListener('DOMContentLoaded', function () {
|
| 270 |
+
const socket = io();
|
| 271 |
+
const progressContainer = document.getElementById('progress-container');
|
| 272 |
+
const progressBar = document.getElementById('progress-bar');
|
| 273 |
+
const progressText = document.getElementById('progress-text');
|
| 274 |
+
const progressPhase = document.getElementById('progress-phase');
|
| 275 |
+
|
| 276 |
+
const phaseNames = {
|
| 277 |
+
'detection': '🔍 Phát hiện bubbles',
|
| 278 |
+
'ocr': '📖 OCR nhận dạng text',
|
| 279 |
+
'translation': '🌐 Dịch văn bản',
|
| 280 |
+
'rendering': '✏️ Render text vào ảnh',
|
| 281 |
+
'done': '✅ Hoàn tất'
|
| 282 |
+
};
|
| 283 |
+
|
| 284 |
+
socket.on('progress', function (data) {
|
| 285 |
+
progressContainer.style.display = 'block';
|
| 286 |
+
|
| 287 |
+
const phaseName = phaseNames[data.phase] || data.phase;
|
| 288 |
+
progressPhase.textContent = phaseName;
|
| 289 |
+
progressBar.style.width = data.percent + '%';
|
| 290 |
+
progressText.textContent = data.message;
|
| 291 |
+
|
| 292 |
+
if (data.phase === 'done') {
|
| 293 |
+
progressBar.style.background = 'linear-gradient(90deg, #50C878, #2ecc71)';
|
| 294 |
+
}
|
| 295 |
+
});
|
| 296 |
+
|
| 297 |
+
// Show progress when form submitted
|
| 298 |
+
document.querySelector('form').addEventListener('submit', function () {
|
| 299 |
+
progressContainer.style.display = 'block';
|
| 300 |
+
progressBar.style.width = '0%';
|
| 301 |
+
progressBar.style.background = 'linear-gradient(90deg, #5E1675, #8e44ad)';
|
| 302 |
+
progressText.textContent = 'Khởi tạo...';
|
| 303 |
+
progressPhase.textContent = '⏳ Chuẩn bị';
|
| 304 |
+
});
|
| 305 |
+
});
|
| 306 |
+
</script>
|
| 307 |
</body>
|
| 308 |
|
| 309 |
</html>
|
templates/translate.html
CHANGED
|
@@ -36,10 +36,15 @@
|
|
| 36 |
</div>
|
| 37 |
|
| 38 |
<div class="buttons_image">
|
| 39 |
-
<a href="#" class="green" id="download-
|
| 40 |
<a href="/" class="red">← Quay lại</a>
|
| 41 |
</div>
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
</body>
|
| 44 |
<script>
|
| 45 |
// Download single image
|
|
@@ -55,14 +60,22 @@
|
|
| 55 |
});
|
| 56 |
});
|
| 57 |
|
| 58 |
-
// Download all images
|
| 59 |
-
document.getElementById('download-
|
| 60 |
e.preventDefault();
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
});
|
| 67 |
</script>
|
| 68 |
|
|
|
|
| 36 |
</div>
|
| 37 |
|
| 38 |
<div class="buttons_image">
|
| 39 |
+
<a href="#" class="green" id="download-zip">📦 Download ZIP</a>
|
| 40 |
<a href="/" class="red">← Quay lại</a>
|
| 41 |
</div>
|
| 42 |
|
| 43 |
+
<!-- Hidden form for ZIP download -->
|
| 44 |
+
<form id="zip-form" action="/download-zip" method="POST" style="display: none;">
|
| 45 |
+
<input type="hidden" name="images_data" id="images-data">
|
| 46 |
+
</form>
|
| 47 |
+
|
| 48 |
</body>
|
| 49 |
<script>
|
| 50 |
// Download single image
|
|
|
|
| 60 |
});
|
| 61 |
});
|
| 62 |
|
| 63 |
+
// Download all images as ZIP
|
| 64 |
+
document.getElementById('download-zip').addEventListener('click', (e) => {
|
| 65 |
e.preventDefault();
|
| 66 |
+
|
| 67 |
+
// Collect all images data
|
| 68 |
+
const images = [];
|
| 69 |
+
document.querySelectorAll('.download-btn').forEach(btn => {
|
| 70 |
+
images.push({
|
| 71 |
+
name: btn.getAttribute('data-name'),
|
| 72 |
+
data: btn.getAttribute('data-image')
|
| 73 |
+
});
|
| 74 |
});
|
| 75 |
+
|
| 76 |
+
// Submit form with images data
|
| 77 |
+
document.getElementById('images-data').value = JSON.stringify(images);
|
| 78 |
+
document.getElementById('zip-form').submit();
|
| 79 |
});
|
| 80 |
</script>
|
| 81 |
|
translator/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (345 Bytes). View file
|
|
|
translator/__pycache__/copilot_translator.cpython-311.pyc
ADDED
|
Binary file (16.2 kB). View file
|
|
|
translator/__pycache__/gemini_translator.cpython-311.pyc
CHANGED
|
Binary files a/translator/__pycache__/gemini_translator.cpython-311.pyc and b/translator/__pycache__/gemini_translator.cpython-311.pyc differ
|
|
|
translator/__pycache__/translator.cpython-311.pyc
CHANGED
|
Binary files a/translator/__pycache__/translator.cpython-311.pyc and b/translator/__pycache__/translator.cpython-311.pyc differ
|
|
|
translator/copilot_translator.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Copilot API Translator
|
| 3 |
+
Uses copilot-api proxy server (OpenAI-compatible endpoint)
|
| 4 |
+
https://github.com/ericc-ch/copilot-api
|
| 5 |
+
"""
|
| 6 |
+
import requests
|
| 7 |
+
import json
|
| 8 |
+
from typing import List
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class CopilotTranslator:
|
| 12 |
+
"""
|
| 13 |
+
Translator using Copilot API proxy server.
|
| 14 |
+
Communicates via OpenAI-compatible /v1/chat/completions endpoint.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
LANG_NAMES = {
|
| 18 |
+
"ja": "Japanese",
|
| 19 |
+
"zh": "Chinese",
|
| 20 |
+
"ko": "Korean",
|
| 21 |
+
"en": "English",
|
| 22 |
+
"vi": "Vietnamese",
|
| 23 |
+
"th": "Thai",
|
| 24 |
+
"id": "Indonesian",
|
| 25 |
+
"fr": "French",
|
| 26 |
+
"de": "German",
|
| 27 |
+
"es": "Spanish",
|
| 28 |
+
"ru": "Russian"
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
# Available models (from Copilot API)
|
| 32 |
+
MODELS = [
|
| 33 |
+
# GPT-5 Series
|
| 34 |
+
"gpt-5",
|
| 35 |
+
"gpt-5-mini",
|
| 36 |
+
"gpt-5.1",
|
| 37 |
+
"gpt-5.1-codex",
|
| 38 |
+
"gpt-5.1-codex-mini",
|
| 39 |
+
"gpt-5.1-codex-max",
|
| 40 |
+
"gpt-5-codex",
|
| 41 |
+
# GPT-4.1 Series
|
| 42 |
+
"gpt-4.1",
|
| 43 |
+
"gpt-41-copilot",
|
| 44 |
+
# GPT-4o Series
|
| 45 |
+
"gpt-4o",
|
| 46 |
+
"gpt-4o-mini",
|
| 47 |
+
"gpt-4o-2024-11-20",
|
| 48 |
+
# GPT-4 Series
|
| 49 |
+
"gpt-4",
|
| 50 |
+
"gpt-4-0125-preview",
|
| 51 |
+
# GPT-3.5
|
| 52 |
+
"gpt-3.5-turbo",
|
| 53 |
+
# Claude Series
|
| 54 |
+
"claude-sonnet-4.5",
|
| 55 |
+
"claude-sonnet-4",
|
| 56 |
+
"claude-opus-4.5",
|
| 57 |
+
"claude-haiku-4.5",
|
| 58 |
+
# Gemini
|
| 59 |
+
"gemini-3-pro-preview",
|
| 60 |
+
"gemini-2.5-pro",
|
| 61 |
+
# Other
|
| 62 |
+
"grok-code-fast-1",
|
| 63 |
+
]
|
| 64 |
+
|
| 65 |
+
def __init__(self, server_url: str = "http://localhost:8080", model: str = "gpt-4o"):
|
| 66 |
+
"""
|
| 67 |
+
Initialize Copilot translator.
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
server_url: Copilot API proxy server URL (e.g., http://localhost:8080)
|
| 71 |
+
model: Model to use (e.g., gpt-4o, claude-3.5-sonnet)
|
| 72 |
+
"""
|
| 73 |
+
self.base_url = server_url.rstrip("/")
|
| 74 |
+
self.model = model
|
| 75 |
+
self.endpoint = f"{self.base_url}/v1/chat/completions"
|
| 76 |
+
|
| 77 |
+
def translate_single(self, text: str, source: str = "ja", target: str = "en") -> str:
|
| 78 |
+
"""Translate a single text string."""
|
| 79 |
+
if not text or not text.strip():
|
| 80 |
+
return text
|
| 81 |
+
|
| 82 |
+
source_name = self.LANG_NAMES.get(source, "Japanese")
|
| 83 |
+
target_name = self.LANG_NAMES.get(target, "English")
|
| 84 |
+
|
| 85 |
+
prompt = f"""You are an expert manga/comic translator. Translate the following {source_name} text to {target_name}.
|
| 86 |
+
|
| 87 |
+
Rules:
|
| 88 |
+
- Translate for SPOKEN dialogue, natural when read aloud
|
| 89 |
+
- Preserve tone, emotion, and personality
|
| 90 |
+
- For Vietnamese: use appropriate pronouns based on context
|
| 91 |
+
- Return ONLY the translated text, nothing else
|
| 92 |
+
|
| 93 |
+
Text: {text}"""
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
response = requests.post(
|
| 97 |
+
self.endpoint,
|
| 98 |
+
json={
|
| 99 |
+
"model": self.model,
|
| 100 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 101 |
+
"temperature": 0.3,
|
| 102 |
+
},
|
| 103 |
+
timeout=30
|
| 104 |
+
)
|
| 105 |
+
response.raise_for_status()
|
| 106 |
+
result = response.json()
|
| 107 |
+
return result["choices"][0]["message"]["content"].strip()
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f"Copilot translation error: {e}")
|
| 110 |
+
return text
|
| 111 |
+
|
| 112 |
+
def translate_batch(self, texts: List[str], source: str = "ja", target: str = "en") -> List[str]:
|
| 113 |
+
"""
|
| 114 |
+
Translate multiple texts in a single API call.
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
texts: List of texts to translate
|
| 118 |
+
source: Source language code
|
| 119 |
+
target: Target language code
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
List of translated texts (same order)
|
| 123 |
+
"""
|
| 124 |
+
if not texts:
|
| 125 |
+
return []
|
| 126 |
+
|
| 127 |
+
# Filter empty texts
|
| 128 |
+
indexed_texts = [(i, t) for i, t in enumerate(texts) if t and t.strip()]
|
| 129 |
+
if not indexed_texts:
|
| 130 |
+
return texts
|
| 131 |
+
|
| 132 |
+
texts_to_translate = [t for _, t in indexed_texts]
|
| 133 |
+
|
| 134 |
+
source_name = self.LANG_NAMES.get(source, "Japanese")
|
| 135 |
+
target_name = self.LANG_NAMES.get(target, "English")
|
| 136 |
+
|
| 137 |
+
prompt = f"""You are an expert manga/comic translator. Translate the following {source_name} texts to {target_name}.
|
| 138 |
+
|
| 139 |
+
Rules:
|
| 140 |
+
- These are speech bubble texts from the SAME comic page - maintain consistency
|
| 141 |
+
- Translate for SPOKEN dialogue, natural when read aloud
|
| 142 |
+
- Preserve tone, emotion, and personality
|
| 143 |
+
- For Vietnamese: use appropriate pronouns based on context
|
| 144 |
+
- Keep short lines impactful
|
| 145 |
+
|
| 146 |
+
Input (JSON array of texts):
|
| 147 |
+
{json.dumps(texts_to_translate, ensure_ascii=False)}
|
| 148 |
+
|
| 149 |
+
Return ONLY a JSON array with translated texts in the EXACT same order.
|
| 150 |
+
Example: ["translation 1", "translation 2"]"""
|
| 151 |
+
|
| 152 |
+
try:
|
| 153 |
+
response = requests.post(
|
| 154 |
+
self.endpoint,
|
| 155 |
+
json={
|
| 156 |
+
"model": self.model,
|
| 157 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 158 |
+
"temperature": 0.3,
|
| 159 |
+
},
|
| 160 |
+
timeout=60
|
| 161 |
+
)
|
| 162 |
+
response.raise_for_status()
|
| 163 |
+
result = response.json()
|
| 164 |
+
result_text = result["choices"][0]["message"]["content"].strip()
|
| 165 |
+
|
| 166 |
+
# Clean up response
|
| 167 |
+
if result_text.startswith("```json"):
|
| 168 |
+
result_text = result_text[7:]
|
| 169 |
+
if result_text.startswith("```"):
|
| 170 |
+
result_text = result_text[3:]
|
| 171 |
+
if result_text.endswith("```"):
|
| 172 |
+
result_text = result_text[:-3]
|
| 173 |
+
result_text = result_text.strip()
|
| 174 |
+
|
| 175 |
+
translations = json.loads(result_text)
|
| 176 |
+
|
| 177 |
+
# Validate length
|
| 178 |
+
if len(translations) != len(texts_to_translate):
|
| 179 |
+
print(f"Warning: Expected {len(texts_to_translate)} translations, got {len(translations)}")
|
| 180 |
+
# Pad or truncate
|
| 181 |
+
while len(translations) < len(texts_to_translate):
|
| 182 |
+
translations.append(texts_to_translate[len(translations)])
|
| 183 |
+
translations = translations[:len(texts_to_translate)]
|
| 184 |
+
|
| 185 |
+
# Rebuild full list
|
| 186 |
+
result_list = list(texts)
|
| 187 |
+
for (orig_idx, _), trans in zip(indexed_texts, translations):
|
| 188 |
+
result_list[orig_idx] = trans
|
| 189 |
+
|
| 190 |
+
return result_list
|
| 191 |
+
|
| 192 |
+
except Exception as e:
|
| 193 |
+
print(f"Copilot batch translation error: {e}")
|
| 194 |
+
# Fallback to single translations
|
| 195 |
+
return [self.translate_single(t, source, target) for t in texts]
|
| 196 |
+
|
| 197 |
+
def translate_pages_batch(
|
| 198 |
+
self,
|
| 199 |
+
pages_texts: dict,
|
| 200 |
+
source: str = "ja",
|
| 201 |
+
target: str = "en",
|
| 202 |
+
context: dict = None
|
| 203 |
+
) -> dict:
|
| 204 |
+
"""
|
| 205 |
+
Translate texts from multiple pages in a single API call.
|
| 206 |
+
Ideal for batch processing 10+ manga pages at once.
|
| 207 |
+
|
| 208 |
+
Args:
|
| 209 |
+
pages_texts: Dict mapping page names to list of texts
|
| 210 |
+
e.g., {"page1": ["text1", "text2"], "page2": ["text3"]}
|
| 211 |
+
source: Source language code
|
| 212 |
+
target: Target language code
|
| 213 |
+
context: Optional dict of ALL page texts for context (helps maintain consistency)
|
| 214 |
+
|
| 215 |
+
Returns:
|
| 216 |
+
Dict with same structure but translated texts
|
| 217 |
+
"""
|
| 218 |
+
if not pages_texts:
|
| 219 |
+
return {}
|
| 220 |
+
|
| 221 |
+
source_name = self.LANG_NAMES.get(source, "Japanese")
|
| 222 |
+
target_name = self.LANG_NAMES.get(target, "English")
|
| 223 |
+
|
| 224 |
+
# Build context section if context is provided
|
| 225 |
+
context_section = ""
|
| 226 |
+
if context and context != pages_texts:
|
| 227 |
+
# Show summary of other pages for context
|
| 228 |
+
other_pages = {k: v for k, v in context.items() if k not in pages_texts}
|
| 229 |
+
if other_pages:
|
| 230 |
+
context_preview = []
|
| 231 |
+
for page, texts in list(other_pages.items())[:5]: # First 5 pages for context
|
| 232 |
+
context_preview.append(f"{page}: {' | '.join(texts[:3])}...")
|
| 233 |
+
context_section = f"""
|
| 234 |
+
STORY CONTEXT (from other pages in this batch - use for character/tone consistency):
|
| 235 |
+
{chr(10).join(context_preview)}
|
| 236 |
+
---
|
| 237 |
+
"""
|
| 238 |
+
|
| 239 |
+
prompt = f"""You are an expert manga/comic translator. Translate the following {source_name} texts to {target_name}.
|
| 240 |
+
{context_section}
|
| 241 |
+
Context: These are SEQUENTIAL comic pages telling a continuous story. Maintain narrative flow and character voice consistency across all pages.
|
| 242 |
+
|
| 243 |
+
Rules:
|
| 244 |
+
- Translate for SPOKEN dialogue - it must sound natural when read aloud
|
| 245 |
+
- Each character should have a consistent voice/speaking style across pages
|
| 246 |
+
- Preserve tone, emotion, and personality through careful word choice
|
| 247 |
+
- For Vietnamese: Choose appropriate pronouns based on character relationships
|
| 248 |
+
- Keep short lines impactful. Don't pad or over-explain.
|
| 249 |
+
|
| 250 |
+
Input (JSON - sequential pages with their speech bubbles):
|
| 251 |
+
{json.dumps(pages_texts, ensure_ascii=False, indent=2)}
|
| 252 |
+
|
| 253 |
+
IMPORTANT: Return ONLY a valid JSON object with the exact same structure but with translated texts.
|
| 254 |
+
Keep page names and bubble order exactly the same. No explanations or markdown."""
|
| 255 |
+
|
| 256 |
+
try:
|
| 257 |
+
response = requests.post(
|
| 258 |
+
self.endpoint,
|
| 259 |
+
json={
|
| 260 |
+
"model": self.model,
|
| 261 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 262 |
+
"temperature": 0.3,
|
| 263 |
+
},
|
| 264 |
+
timeout=120 # Longer timeout for multi-page batch
|
| 265 |
+
)
|
| 266 |
+
response.raise_for_status()
|
| 267 |
+
result = response.json()
|
| 268 |
+
result_text = result["choices"][0]["message"]["content"].strip()
|
| 269 |
+
|
| 270 |
+
# Clean up response
|
| 271 |
+
if result_text.startswith("```json"):
|
| 272 |
+
result_text = result_text[7:]
|
| 273 |
+
if result_text.startswith("```"):
|
| 274 |
+
result_text = result_text[3:]
|
| 275 |
+
if result_text.endswith("```"):
|
| 276 |
+
result_text = result_text[:-3]
|
| 277 |
+
result_text = result_text.strip()
|
| 278 |
+
|
| 279 |
+
translated = json.loads(result_text)
|
| 280 |
+
print(f"✓ Translated {len(pages_texts)} pages in single batch")
|
| 281 |
+
return translated
|
| 282 |
+
|
| 283 |
+
except Exception as e:
|
| 284 |
+
print(f"Copilot pages batch translation error: {e}")
|
| 285 |
+
# Fallback: translate each page separately
|
| 286 |
+
result = {}
|
| 287 |
+
for page_name, texts in pages_texts.items():
|
| 288 |
+
result[page_name] = self.translate_batch(texts, source, target)
|
| 289 |
+
return result
|
| 290 |
+
|
| 291 |
+
def test_connection(self) -> bool:
|
| 292 |
+
"""Test if the server is reachable."""
|
| 293 |
+
try:
|
| 294 |
+
response = requests.get(f"{self.base_url}/v1/models", timeout=5)
|
| 295 |
+
return response.status_code == 200
|
| 296 |
+
except:
|
| 297 |
+
return False
|
| 298 |
+
|
| 299 |
+
def get_available_models(self) -> List[str]:
|
| 300 |
+
"""Get list of available models from server."""
|
| 301 |
+
try:
|
| 302 |
+
response = requests.get(f"{self.base_url}/v1/models", timeout=5)
|
| 303 |
+
if response.status_code == 200:
|
| 304 |
+
data = response.json()
|
| 305 |
+
return [m["id"] for m in data.get("data", [])]
|
| 306 |
+
except:
|
| 307 |
+
pass
|
| 308 |
+
return self.MODELS # Return default list
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
def translate_manga_pages_batch(
|
| 312 |
+
pages_texts: dict,
|
| 313 |
+
server_url: str = "http://localhost:8080",
|
| 314 |
+
model: str = "gpt-4o",
|
| 315 |
+
source_lang: str = "ja",
|
| 316 |
+
target_lang: str = "en",
|
| 317 |
+
batch_size: int = 10
|
| 318 |
+
) -> dict:
|
| 319 |
+
"""
|
| 320 |
+
Translate manga pages in batches.
|
| 321 |
+
|
| 322 |
+
Args:
|
| 323 |
+
pages_texts: All pages' texts {page_name: [texts]}
|
| 324 |
+
server_url: Copilot API server URL
|
| 325 |
+
model: Model to use
|
| 326 |
+
source_lang: Source language code
|
| 327 |
+
target_lang: Target language code
|
| 328 |
+
batch_size: Number of pages per API call (default: 10)
|
| 329 |
+
|
| 330 |
+
Returns:
|
| 331 |
+
All translated texts
|
| 332 |
+
"""
|
| 333 |
+
translator = CopilotTranslator(server_url=server_url, model=model)
|
| 334 |
+
|
| 335 |
+
page_names = list(pages_texts.keys())
|
| 336 |
+
all_results = {}
|
| 337 |
+
|
| 338 |
+
# Process in batches
|
| 339 |
+
for i in range(0, len(page_names), batch_size):
|
| 340 |
+
batch_pages = page_names[i:i + batch_size]
|
| 341 |
+
batch_texts = {name: pages_texts[name] for name in batch_pages}
|
| 342 |
+
|
| 343 |
+
print(f"Translating pages {i+1} to {min(i+batch_size, len(page_names))}...")
|
| 344 |
+
batch_results = translator.translate_pages_batch(
|
| 345 |
+
batch_texts,
|
| 346 |
+
source=source_lang,
|
| 347 |
+
target=target_lang
|
| 348 |
+
)
|
| 349 |
+
all_results.update(batch_results)
|
| 350 |
+
|
| 351 |
+
return all_results
|
translator/gemini_translator.py
CHANGED
|
@@ -6,8 +6,13 @@ Supports multiple source languages and custom prompts
|
|
| 6 |
import google.generativeai as genai
|
| 7 |
import json
|
| 8 |
import os
|
|
|
|
| 9 |
from typing import List, Dict, Optional
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
class GeminiTranslator:
|
| 13 |
"""
|
|
@@ -32,13 +37,13 @@ class GeminiTranslator:
|
|
| 32 |
# Preset style templates
|
| 33 |
STYLE_PRESETS = {
|
| 34 |
"default": "",
|
| 35 |
-
"formal": "Use formal language
|
| 36 |
-
"casual": "Use casual,
|
| 37 |
-
"keep_honorifics": "Keep Japanese honorifics like -san, -kun, -chan, -sama, senpai, sensei.",
|
| 38 |
-
"localize": "Fully localize
|
| 39 |
-
"literal": "Translate
|
| 40 |
-
"web_novel": "Use web novel
|
| 41 |
-
"action": "Use
|
| 42 |
}
|
| 43 |
|
| 44 |
def __init__(self, api_key: str = None, custom_prompt: str = None, style: str = "default"):
|
|
@@ -97,11 +102,19 @@ class GeminiTranslator:
|
|
| 97 |
style = custom_prompt or self.custom_prompt
|
| 98 |
style_text = f"\nStyle: {style}" if style else ""
|
| 99 |
|
| 100 |
-
prompt = f"""
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
|
| 105 |
|
| 106 |
try:
|
| 107 |
response = self.model.generate_content(prompt)
|
|
@@ -118,7 +131,7 @@ Text: {text}"""
|
|
| 118 |
custom_prompt: str = None
|
| 119 |
) -> List[str]:
|
| 120 |
"""
|
| 121 |
-
Translate multiple texts in a single API call.
|
| 122 |
|
| 123 |
Args:
|
| 124 |
texts: List of texts to translate
|
|
@@ -138,55 +151,101 @@ Text: {text}"""
|
|
| 138 |
if not indexed_texts:
|
| 139 |
return texts
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
source_name = self.LANG_NAMES.get(source, "Japanese")
|
| 142 |
target_name = self.LANG_NAMES.get(target, "English")
|
| 143 |
-
texts_to_translate = [t for _, t in indexed_texts]
|
| 144 |
|
| 145 |
style = custom_prompt or self.custom_prompt
|
| 146 |
style_text = f"\nStyle instructions: {style}" if style else ""
|
| 147 |
|
| 148 |
-
prompt = f"""You are
|
| 149 |
-
Keep translations natural and suitable for comic speech bubbles.{style_text}
|
| 150 |
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
{json.dumps(texts_to_translate, ensure_ascii=False)}
|
| 153 |
|
| 154 |
-
IMPORTANT: Return ONLY a JSON array with translated texts in the same order.
|
| 155 |
-
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
if result_text.startswith("```json"):
|
| 163 |
-
result_text = result_text[7:]
|
| 164 |
-
if result_text.startswith("```"):
|
| 165 |
-
result_text = result_text[3:]
|
| 166 |
-
if result_text.endswith("```"):
|
| 167 |
-
result_text = result_text[:-3]
|
| 168 |
-
result_text = result_text.strip()
|
| 169 |
-
|
| 170 |
-
translations = json.loads(result_text)
|
| 171 |
-
|
| 172 |
-
# Rebuild full list with original empty strings preserved
|
| 173 |
-
result = list(texts)
|
| 174 |
-
for (orig_idx, _), trans in zip(indexed_texts, translations):
|
| 175 |
-
result[orig_idx] = trans
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
def translate_pages_batch(
|
| 185 |
self,
|
| 186 |
pages_texts: Dict[str, List[str]],
|
| 187 |
source: str = "ja",
|
| 188 |
target: str = "en",
|
| 189 |
-
custom_prompt: str = None
|
|
|
|
| 190 |
) -> Dict[str, List[str]]:
|
| 191 |
"""
|
| 192 |
Translate texts from multiple pages in a single API call.
|
|
@@ -197,6 +256,7 @@ Example output format: ["translated text 1", "translated text 2", ...]"""
|
|
| 197 |
source: Source language code
|
| 198 |
target: Target language code
|
| 199 |
custom_prompt: Override custom prompt for this call
|
|
|
|
| 200 |
|
| 201 |
Returns:
|
| 202 |
Dict with same structure but translated texts
|
|
@@ -210,15 +270,41 @@ Example output format: ["translated text 1", "translated text 2", ...]"""
|
|
| 210 |
style = custom_prompt or self.custom_prompt
|
| 211 |
style_text = f"\nStyle instructions: {style}" if style else ""
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
-
Input (JSON -
|
| 218 |
{json.dumps(pages_texts, ensure_ascii=False, indent=2)}
|
| 219 |
|
| 220 |
-
IMPORTANT: Return ONLY a JSON object with the exact same structure but with translated texts.
|
| 221 |
-
Keep
|
| 222 |
|
| 223 |
try:
|
| 224 |
response = self.model.generate_content(prompt)
|
|
|
|
| 6 |
import google.generativeai as genai
|
| 7 |
import json
|
| 8 |
import os
|
| 9 |
+
import time
|
| 10 |
from typing import List, Dict, Optional
|
| 11 |
|
| 12 |
+
# Constants for retry logic
|
| 13 |
+
MAX_RETRIES = 3
|
| 14 |
+
RETRY_DELAY_BASE = 0.5 # Faster recovery: 0.5s → 1s → 2s
|
| 15 |
+
|
| 16 |
|
| 17 |
class GeminiTranslator:
|
| 18 |
"""
|
|
|
|
| 37 |
# Preset style templates
|
| 38 |
STYLE_PRESETS = {
|
| 39 |
"default": "",
|
| 40 |
+
"formal": "Use formal, polite language. Use respectful pronouns and expressions.",
|
| 41 |
+
"casual": "Use casual, natural everyday language. Like friends talking to each other.",
|
| 42 |
+
"keep_honorifics": "Keep Japanese honorifics like -san, -kun, -chan, -sama, senpai, sensei untranslated.",
|
| 43 |
+
"localize": "Fully localize cultural references. Adapt idioms and expressions to feel native.",
|
| 44 |
+
"literal": "Translate meaning accurately but ensure it still sounds natural when spoken.",
|
| 45 |
+
"web_novel": "Use dramatic web novel style with impactful expressions and emotional weight.",
|
| 46 |
+
"action": "Use short, punchy sentences. Quick pace. Impactful dialogue.",
|
| 47 |
}
|
| 48 |
|
| 49 |
def __init__(self, api_key: str = None, custom_prompt: str = None, style: str = "default"):
|
|
|
|
| 102 |
style = custom_prompt or self.custom_prompt
|
| 103 |
style_text = f"\nStyle: {style}" if style else ""
|
| 104 |
|
| 105 |
+
prompt = f"""You are an expert manga/comic translator specializing in {source_name} to {target_name} translation.
|
| 106 |
+
|
| 107 |
+
Translation Guidelines:
|
| 108 |
+
- Translate for SPOKEN dialogue, not written text. It should sound natural when read aloud.
|
| 109 |
+
- Preserve the character's tone, emotion, and personality through word choice.
|
| 110 |
+
- Use natural sentence structures in {target_name}. Avoid awkward literal translations.
|
| 111 |
+
- For Vietnamese: Use appropriate pronouns (tao/mày for close friends, tôi/anh/em for normal, etc.) based on context.
|
| 112 |
+
- Keep exclamations and emotional expressions feeling authentic.
|
| 113 |
+
- Maintain the impact and rhythm of short/punchy lines.{style_text}
|
| 114 |
+
|
| 115 |
+
IMPORTANT: Return ONLY the translated text. No explanations, no quotes, no formatting.
|
| 116 |
|
| 117 |
+
Original text: {text}"""
|
| 118 |
|
| 119 |
try:
|
| 120 |
response = self.model.generate_content(prompt)
|
|
|
|
| 131 |
custom_prompt: str = None
|
| 132 |
) -> List[str]:
|
| 133 |
"""
|
| 134 |
+
Translate multiple texts in a single API call with retry logic.
|
| 135 |
|
| 136 |
Args:
|
| 137 |
texts: List of texts to translate
|
|
|
|
| 151 |
if not indexed_texts:
|
| 152 |
return texts
|
| 153 |
|
| 154 |
+
texts_to_translate = [t for _, t in indexed_texts]
|
| 155 |
+
translations = self._translate_batch_internal(texts_to_translate, source, target, custom_prompt)
|
| 156 |
+
|
| 157 |
+
# Rebuild full list with original empty strings preserved
|
| 158 |
+
result = list(texts)
|
| 159 |
+
for (orig_idx, _), trans in zip(indexed_texts, translations):
|
| 160 |
+
result[orig_idx] = trans
|
| 161 |
+
|
| 162 |
+
return result
|
| 163 |
+
|
| 164 |
+
def _translate_batch_internal(
|
| 165 |
+
self,
|
| 166 |
+
texts_to_translate: List[str],
|
| 167 |
+
source: str,
|
| 168 |
+
target: str,
|
| 169 |
+
custom_prompt: str = None
|
| 170 |
+
) -> List[str]:
|
| 171 |
+
"""Internal method to translate a single chunk with retry logic."""
|
| 172 |
source_name = self.LANG_NAMES.get(source, "Japanese")
|
| 173 |
target_name = self.LANG_NAMES.get(target, "English")
|
|
|
|
| 174 |
|
| 175 |
style = custom_prompt or self.custom_prompt
|
| 176 |
style_text = f"\nStyle instructions: {style}" if style else ""
|
| 177 |
|
| 178 |
+
prompt = f"""You are an expert manga/comic translator with years of experience in {source_name} to {target_name} translation.
|
|
|
|
| 179 |
|
| 180 |
+
Translation Guidelines:
|
| 181 |
+
- These are speech bubble texts from the SAME comic page - maintain consistency in character voices.
|
| 182 |
+
- Translate for SPOKEN dialogue. It must sound natural when read aloud, not stiff or robotic.
|
| 183 |
+
- Preserve each character's tone, emotion, and personality through appropriate word choice.
|
| 184 |
+
- Use natural {target_name} sentence structures. AVOID awkward literal word-for-word translations.
|
| 185 |
+
- For Vietnamese specifically:
|
| 186 |
+
+ Use appropriate pronouns based on relationship (tao/mày, tôi/cậu, anh/em, etc.)
|
| 187 |
+
+ Translate exclamations naturally (くそ → Chết tiệt, やばい → Chết rồi, etc.)
|
| 188 |
+
+ Keep dialogue feeling authentic to how Vietnamese people actually speak
|
| 189 |
+
- Maintain the impact of short/punchy lines. Don't over-explain.
|
| 190 |
+
- Keep emotional expressions and interjections feeling authentic.{style_text}
|
| 191 |
+
|
| 192 |
+
Input texts (JSON array - each is a separate speech bubble):
|
| 193 |
{json.dumps(texts_to_translate, ensure_ascii=False)}
|
| 194 |
|
| 195 |
+
IMPORTANT: Return ONLY a valid JSON array with translated texts in the EXACT same order.
|
| 196 |
+
Format: ["translation 1", "translation 2", ...]"""
|
| 197 |
|
| 198 |
+
# Retry with exponential backoff
|
| 199 |
+
for attempt in range(MAX_RETRIES):
|
| 200 |
+
try:
|
| 201 |
+
response = self.model.generate_content(prompt)
|
| 202 |
+
result_text = response.text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
+
# Clean up response if needed
|
| 205 |
+
if result_text.startswith("```json"):
|
| 206 |
+
result_text = result_text[7:]
|
| 207 |
+
if result_text.startswith("```"):
|
| 208 |
+
result_text = result_text[3:]
|
| 209 |
+
if result_text.endswith("```"):
|
| 210 |
+
result_text = result_text[:-3]
|
| 211 |
+
result_text = result_text.strip()
|
| 212 |
+
|
| 213 |
+
translations = json.loads(result_text)
|
| 214 |
+
|
| 215 |
+
# Validate response length
|
| 216 |
+
if len(translations) != len(texts_to_translate):
|
| 217 |
+
raise ValueError(f"Expected {len(texts_to_translate)} translations, got {len(translations)}")
|
| 218 |
+
|
| 219 |
+
return translations
|
| 220 |
+
|
| 221 |
+
except Exception as e:
|
| 222 |
+
error_str = str(e)
|
| 223 |
+
print(f"Gemini batch attempt {attempt + 1}/{MAX_RETRIES} failed: {e}")
|
| 224 |
+
|
| 225 |
+
# Check if it's a quota error - don't retry or fallback
|
| 226 |
+
if "429" in error_str or "quota" in error_str.lower():
|
| 227 |
+
print("⚠️ Quota exceeded! Returning original texts to avoid more API calls.")
|
| 228 |
+
print(" Wait 1 minute or upgrade your Gemini API plan.")
|
| 229 |
+
return texts_to_translate # Return original texts
|
| 230 |
+
|
| 231 |
+
if attempt < MAX_RETRIES - 1:
|
| 232 |
+
delay = RETRY_DELAY_BASE * (2 ** attempt)
|
| 233 |
+
print(f"Retrying in {delay}s...")
|
| 234 |
+
time.sleep(delay)
|
| 235 |
+
else:
|
| 236 |
+
# Only fallback to single translations if NOT quota error
|
| 237 |
+
print("All retries failed, falling back to single translations")
|
| 238 |
+
return [self.translate_single(t, source, target) for t in texts_to_translate]
|
| 239 |
+
|
| 240 |
+
return texts_to_translate # Fallback: return original
|
| 241 |
|
| 242 |
def translate_pages_batch(
|
| 243 |
self,
|
| 244 |
pages_texts: Dict[str, List[str]],
|
| 245 |
source: str = "ja",
|
| 246 |
target: str = "en",
|
| 247 |
+
custom_prompt: str = None,
|
| 248 |
+
context: Dict[str, List[str]] = None
|
| 249 |
) -> Dict[str, List[str]]:
|
| 250 |
"""
|
| 251 |
Translate texts from multiple pages in a single API call.
|
|
|
|
| 256 |
source: Source language code
|
| 257 |
target: Target language code
|
| 258 |
custom_prompt: Override custom prompt for this call
|
| 259 |
+
context: Optional dict of ALL page texts for context (helps maintain consistency)
|
| 260 |
|
| 261 |
Returns:
|
| 262 |
Dict with same structure but translated texts
|
|
|
|
| 270 |
style = custom_prompt or self.custom_prompt
|
| 271 |
style_text = f"\nStyle instructions: {style}" if style else ""
|
| 272 |
|
| 273 |
+
# Build context section if context is provided
|
| 274 |
+
context_section = ""
|
| 275 |
+
if context and context != pages_texts:
|
| 276 |
+
other_pages = {k: v for k, v in context.items() if k not in pages_texts}
|
| 277 |
+
if other_pages:
|
| 278 |
+
context_preview = []
|
| 279 |
+
for page, texts in list(other_pages.items())[:5]:
|
| 280 |
+
context_preview.append(f"{page}: {' | '.join(texts[:3])}...")
|
| 281 |
+
context_section = f"""
|
| 282 |
+
STORY CONTEXT (from other pages - use for character/tone consistency):
|
| 283 |
+
{chr(10).join(context_preview)}
|
| 284 |
+
---
|
| 285 |
+
"""
|
| 286 |
+
|
| 287 |
+
prompt = f"""You are an expert manga/comic translator with deep understanding of {source_name} to {target_name} translation.
|
| 288 |
+
{context_section}
|
| 289 |
+
Context: These are SEQUENTIAL comic pages telling a continuous story. Maintain narrative flow and character voice consistency across all pages.
|
| 290 |
+
|
| 291 |
+
Translation Guidelines:
|
| 292 |
+
- Translate for SPOKEN dialogue - it must sound natural when read aloud.
|
| 293 |
+
- Each character should have a consistent voice/speaking style across pages.
|
| 294 |
+
- Preserve tone, emotion, and personality through careful word choice.
|
| 295 |
+
- Use natural {target_name} sentence structures. NEVER translate word-for-word literally.
|
| 296 |
+
- For Vietnamese:
|
| 297 |
+
+ Choose appropriate pronouns based on character relationships and social context
|
| 298 |
+
+ Translate interjections and exclamations to feel authentic (not literal)
|
| 299 |
+
+ Use natural Vietnamese speech patterns, not textbook Vietnamese
|
| 300 |
+
- Keep short lines impactful. Don't pad or over-explain.
|
| 301 |
+
- Sound effects and onomatopoeia: translate the meaning/feeling, not literally.{style_text}
|
| 302 |
|
| 303 |
+
Input (JSON - sequential pages with their speech bubbles):
|
| 304 |
{json.dumps(pages_texts, ensure_ascii=False, indent=2)}
|
| 305 |
|
| 306 |
+
IMPORTANT: Return ONLY a valid JSON object with the exact same structure but with translated texts.
|
| 307 |
+
Keep page names and bubble order exactly the same. No explanations or markdown."""
|
| 308 |
|
| 309 |
try:
|
| 310 |
response = self.model.generate_content(prompt)
|
translator/translator.py
CHANGED
|
@@ -150,7 +150,9 @@ class MangaTranslator:
|
|
| 150 |
try:
|
| 151 |
if self._gemini_translator is None:
|
| 152 |
from .gemini_translator import GeminiTranslator
|
| 153 |
-
api_key = self
|
|
|
|
|
|
|
| 154 |
custom_prompt = getattr(self, '_gemini_custom_prompt', None)
|
| 155 |
self._gemini_translator = GeminiTranslator(
|
| 156 |
api_key=api_key,
|
|
|
|
| 150 |
try:
|
| 151 |
if self._gemini_translator is None:
|
| 152 |
from .gemini_translator import GeminiTranslator
|
| 153 |
+
api_key = getattr(self, '_gemini_api_key', None) or self.gemini_api_key
|
| 154 |
+
if not api_key:
|
| 155 |
+
raise ValueError("Gemini API key required. Please enter it in the web form.")
|
| 156 |
custom_prompt = getattr(self, '_gemini_custom_prompt', None)
|
| 157 |
self._gemini_translator = GeminiTranslator(
|
| 158 |
api_key=api_key,
|