Spaces:

ebhon
/

MangaFlow

Sleeping

App Files Files Community

ebhon commited on Apr 25, 2025

Commit

5ff95a7

verified ·

1 Parent(s): 9d96218

Upload 9 files

Browse files

Files changed (9) hide show

.streamlit/config.toml +10 -0
app.py +251 -0
manga_translator/__init__.py +3 -0
manga_translator/__pycache__/__init__.cpython-312.pyc +0 -0
manga_translator/__pycache__/translator.cpython-312.pyc +0 -0
manga_translator/font/CC Wild Words Bold Italic.ttf +0 -0
manga_translator/font/CC Wild Words Italic.ttf +0 -0
manga_translator/font/CC Wild Words Roman.ttf +0 -0
manga_translator/translator.py +876 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,10 @@

+[theme]
+primaryColor = "#FF4B4B"
+backgroundColor = "#FFFFFF"
+secondaryBackgroundColor = "#F0F2F6"
+textColor = "#262730"
+font = "sans serif"
+[server]
+enableCORS = false
+enableXsrfProtection = false

app.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import streamlit as st
+import os
+from PIL import Image
+import torch
+from manga_translator.translator import MangaTextDetector, process_manga_pages
+import tempfile
+import cv2
+import io
+# Initialize session state
+if 'processed_results' not in st.session_state:
+    st.session_state.processed_results = {}
+if 'temp_dir' not in st.session_state:
+    st.session_state.temp_dir = tempfile.mkdtemp()
+# Set page config for wider layout and title
+st.set_page_config(
+    page_title="Manga Translator",
+    page_icon="📚",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS to improve the appearance
+st.markdown("""
+    <style>
+        /* Reset container styles */
+        .block-container {
+            padding: 2rem 1rem !important;
+            max-width: none;
+        }
+        /* Main content area */
+        .main .block-container {
+            padding-left: calc(250px + 1rem) !important;
+        }
+        /* Sidebar styling */
+        section[data-testid="stSidebar"] {
+            width: 250px !important;
+            background-color: rgb(240, 242, 246) !important;
+            position: fixed !important;
+            left: 0 !important;
+            top: 0 !important;
+            height: 100vh !important;
+        }
+        /* Title styling */
+        .stTitle {
+            font-size: 3rem !important;
+            font-weight: 700 !important;
+            color: #1E1E1E !important;
+            padding-bottom: 2rem !important;
+        }
+        /* Subheader styling */
+        .stSubheader {
+            font-size: 1.5rem !important;
+            color: #4F4F4F !important;
+            padding-bottom: 1rem !important;
+        }
+        /* Upload section styling */
+        .uploadSection {
+            background-color: white;
+            padding: 2rem;
+            border-radius: 10px;
+            margin: 1rem 0;
+            border: 1px solid rgb(224, 224, 224);
+        }
+        /* Hide default menu text */
+        .css-17lntkn {
+            display: none;
+        }
+        .css-pkbazv {
+            display: none;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Main title and description
+st.title("🎯 Manga Translator")
+st.write("This app uses custom YOLO detection, OCR, and machine translation to automatically translate manga pages from Japanese to English.")
+# Add warning note
+st.warning("**Note:** Translation accuracy may vary due to the complexity of Japanese text and manga-specific expressions. We're continuously working to improve the system!")
+# Guidelines section
+st.markdown("📋 **Guidelines for Best Results**")
+st.markdown("1. **Image Requirements:**")
+st.markdown("""
+   - Clear, high-resolution manga page
+   - Japanese text should be clearly visible
+   - Text bubbles should not be cropped
+   - Supported formats: PNG, JPG, JPEG
+""")
+st.markdown("2. **For Best Results:**")
+st.markdown("""
+   - Avoid pages with handwritten text
+   - Ensure text bubbles are not overlapping
+   - Image should be properly oriented
+   - Avoid heavily compressed images
+""")
+st.markdown("3. **Privacy & Copyright:**")
+st.markdown("""
+   - Only upload content you have rights to use
+   - We don't store any uploaded images
+   - All processing is done in real-time
+""")
+# How it works section
+st.markdown("🔍 **How It Works**")
+st.markdown("""
+1. **Text Detection:** Custom YOLO model detects text bubbles
+2. **OCR Processing:** Extracts Japanese text
+3. **Translation:** Converts to English using DeepL API
+4. **Text Insertion:** Places translated text back into the image
+""")
+def process_image(uploaded_file):
+    """Process image and store results in session state."""
+    if uploaded_file.name not in st.session_state.processed_results:
+        try:
+            detector = MangaTextDetector('best.pt')
+            # Save temporary file
+            temp_path = os.path.join(st.session_state.temp_dir, uploaded_file.name)
+            with open(temp_path, "wb") as f:
+                f.write(uploaded_file.getbuffer())
+            # Process image
+            image, detections, result_image, processed_regions, translated_image = detector.process_image(temp_path)
+            # Store all results in session state
+            st.session_state.processed_results[uploaded_file.name] = {
+                'image': image,
+                'detections': detections,
+                'result_image': result_image,
+                'processed_regions': processed_regions,
+                'translated_image': translated_image
+            }
+            return True
+        except Exception as e:
+            st.error(f"❌ Error: {str(e)}")
+            return False
+    return True
+# File uploader section
+with st.container():
+    st.markdown('<div class="uploadSection">', unsafe_allow_html=True)
+    uploaded_files = st.file_uploader(
+        "Choose manga pages",
+        type=['png', 'jpg', 'jpeg'],
+        accept_multiple_files=True,
+        help="Drag and drop your manga images here. Supported formats: PNG, JPG, JPEG"
+    )
+    st.markdown('</div>', unsafe_allow_html=True)
+if uploaded_files:
+    # Create temporary directory for processing
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Save uploaded files to temp directory
+        for uploaded_file in uploaded_files:
+            file_path = os.path.join(temp_dir, uploaded_file.name)
+            with open(file_path, "wb") as f:
+                f.write(uploaded_file.getbuffer())
+        # Process the manga pages
+        with st.spinner("Processing your manga pages..."):
+            detector = MangaTextDetector('best.pt')
+            # Process each file
+            for uploaded_file in uploaded_files:
+                st.subheader(f"Processing: {uploaded_file.name}")
+                # Create columns for display
+                col1, col2 = st.columns(2)
+                # Load and display original image
+                image_path = os.path.join(temp_dir, uploaded_file.name)
+                original_image = Image.open(image_path)
+                with col1:
+                    st.markdown("**Original Image**")
+                    st.image(original_image, use_column_width=True)
+                # Process the image
+                try:
+                    image, detections, result_image, processed_regions, translated_image = detector.process_image(image_path)
+                    with col2:
+                        if translated_image is not None:
+                            st.markdown("**Translated Image**")
+                            st.image(translated_image, use_column_width=True)
+                        else:
+                            st.error("No text was detected in this image.")
+                    # Show detected text and translations if available
+                    if processed_regions and processed_regions['text_regions']:
+                        with st.expander("View Detected Text and Translations"):
+                            for i, region in enumerate(processed_regions['text_regions'], 1):
+                                st.markdown(f"**Region {i}**")
+                                cols = st.columns(2)
+                                with cols[0]:
+                                    st.markdown("Original Text:")
+                                    st.code(region['text'])
+                                with cols[1]:
+                                    st.markdown("Translation:")
+                                    st.code(region['translation'])
+                                st.markdown("---")
+                except Exception as e:
+                    st.error(f"Error processing {uploaded_file.name}: {str(e)}")
+                    continue
+                st.markdown("---")
+else:
+    # Show instructions when no files are uploaded
+    st.info("👆 Upload a manga page to get started!")
+# Footer
+st.markdown("---")
+st.markdown("""
+    <div style='text-align: center; color: #666666; padding: 1rem;'>
+        Made with ❤️ for manga fans
+    </div>
+""", unsafe_allow_html=True)
+# Add reset button to sidebar
+if st.sidebar.button("🔄 Reset All"):
+    # Clear session state
+    for key in ['processed_results', 'temp_dir']:
+        if key in st.session_state:
+            del st.session_state[key]
+    st.experimental_rerun()
+# Footer
+st.markdown("---")
+st.markdown("""
+*Created by Ebhon*
+This app translates manga text from Japanese to English using:
+- YOLO for text detection
+- Manga OCR for Japanese text recognition
+- DeepL for translation
+""")

manga_translator/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .translator import process_manga_pages
2	+
3	+ __all__ = ['process_manga_pages']

manga_translator/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (275 Bytes). View file

manga_translator/__pycache__/translator.cpython-312.pyc ADDED Viewed

Binary file (38.5 kB). View file

manga_translator/font/CC Wild Words Bold Italic.ttf ADDED Viewed

Binary file (32.8 kB). View file

manga_translator/font/CC Wild Words Italic.ttf ADDED Viewed

Binary file (32.6 kB). View file

manga_translator/font/CC Wild Words Roman.ttf ADDED Viewed

Binary file (32.3 kB). View file

manga_translator/translator.py ADDED Viewed

	@@ -0,0 +1,876 @@

+from ultralytics import YOLO
+import cv2
+import os
+from PIL import Image, ImageFile, ImageDraw, ImageFont
+import numpy as np
+import logging
+import warnings
+import transformers
+import streamlit as st
+import matplotlib.pyplot as plt
+import re
+from manga_ocr import MangaOcr
+from difflib import SequenceMatcher
+import deepl
+from dotenv import load_dotenv
+import textwrap
+# Configure logging and warnings
+transformers.logging.set_verbosity_error()
+logging.getLogger("transformers").setLevel(logging.ERROR)
+warnings.filterwarnings("ignore")
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Configure image loading
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+# Load environment variables
+load_dotenv()
+# Initialize DeepL translator with error handling
+try:
+    deepl_api_key = os.getenv('DEEPL_API_KEY')
+    if not deepl_api_key:
+        raise ValueError("DeepL API key not found in environment variables")
+    translator_deepl = deepl.Translator(deepl_api_key)
+except Exception as e:
+    logging.error(f"Failed to initialize DeepL translator: {e}")
+    # Fallback to a placeholder translator for testing
+    class PlaceholderTranslator:
+        def translate_text(self, text, source_lang, target_lang):
+            class Result:
+                def __init__(self, text):
+                    self.text = f"[TRANSLATION: {text}]"
+            return Result(text)
+    translator_deepl = PlaceholderTranslator()
+class MangaTextDetector:
+    def __init__(self, model_path="best.pt"):
+        """Initialize the detector with YOLO model"""
+        self.model = YOLO(model_path)
+        self.model.conf = 0.25
+        self.model.iou = 0.45
+        self.mocr = MangaOcr()
+        self.font_path = 'font/CC Wild Words Roman.ttf'
+    def reload_and_save_images(self, folder_path):
+        """
+        Reloads and resaves all images in a folder to ensure they are properly formatted.
+        Args:
+            folder_path (str): Path to the directory containing images
+        """
+        os.makedirs(folder_path, exist_ok=True)
+        for filename in os.listdir(folder_path):
+            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
+                path = os.path.join(folder_path, filename)
+                try:
+                    img = Image.open(path)
+                    img = img.convert("RGB")
+                    img.save(path)
+                except Exception as e:
+                    logging.error(f"Skipping {filename}: {e}")
+    def load_image(self, image_path):
+        """
+        Load an image from path
+        Args:
+            image_path (str): Path to image file
+        Returns:
+            numpy.ndarray: Loaded image in BGR format
+        """
+        image = cv2.imread(image_path)
+        if image is None:
+            raise ValueError(f"Could not load image: {image_path}")
+        return image
+    def detect_text(self, image):
+        """
+        Detect text regions in an image
+        Args:
+            image (numpy.ndarray): Input image
+        Returns:
+            list: List of detections (coordinates, class, confidence)
+        """
+        results = self.model(image)
+        if not results or len(results) == 0:
+            logging.warning("No text regions detected")
+            return []
+        detections = []
+        result = results[0]  # Get first result
+        logging.info(f"Found {len(result.boxes)} potential text regions")
+        for box in result.boxes:
+            try:
+                coords = box.xyxy[0].cpu().numpy()  # Get coordinates
+                cls = int(box.cls[0].item())  # Get class
+                conf = float(box.conf[0].item())  # Get confidence
+                box_coords = [int(c) for c in coords]
+                if conf > self.model.conf:
+                    detections.append((box_coords, cls, conf))
+            except Exception as e:
+                logging.warning(f"Error processing detection: {str(e)}")
+                continue
+        return detections
+    def draw_detections(self, image, detections):
+        """
+        Draw detection boxes on image
+        Args:
+            image (numpy.ndarray): Input image
+            detections (list): List of detections
+        Returns:
+            numpy.ndarray: Image with drawn detections
+        """
+        display_img = image.copy()
+        colors = {
+            0: (0, 255, 0),    # Green for speech bubbles
+            1: (255, 0, 0),    # Blue for narration
+            2: (0, 0, 255),    # Red for other text
+            3: (255, 255, 0),  # Cyan for text
+            4: (0, 255, 255)   # Yellow for UI
+        }
+        class_names = {
+            0: "Speech Bubble",
+            1: "Narration",
+            2: "Other Text",
+            3: "Text",
+            4: "UI Element"
+        }
+        for box_coords, cls, conf in detections:
+            x1, y1, x2, y2 = box_coords
+            color = colors.get(cls, (0, 255, 0))
+            # Draw rectangle
+            cv2.rectangle(display_img, (x1, y1), (x2, y2), color, 2)
+            # Add label
+            class_name = class_names.get(cls, "Unknown")
+            label = f"{class_name}: {conf:.2f}"
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            font_scale = 0.5
+            thickness = 1
+            (text_width, text_height), _ = cv2.getTextSize(label, font, font_scale, thickness)
+            cv2.rectangle(display_img, (x1, y1-text_height-5), (x1+text_width, y1), color, -1)
+            cv2.putText(display_img, label, (x1, y1-5), font, font_scale, (255, 255, 255), thickness)
+        return display_img
+    def sort_bubbles(self, boxes):
+        """
+        Sorts text bubbles in reading order (top-to-bottom, right-to-left).
+        Args:
+            boxes (list or numpy.ndarray): List of bounding boxes in format [x1, y1, x2, y2]
+        Returns:
+            list: Sorted list of bounding boxes in reading order
+        """
+        return sorted(boxes, key=lambda b: (int(b[1] // 50), -int(b[0])))
+    def determine_region_type(self, box, image, class_id):
+        """
+        Determine region type based on the model's class prediction.
+        """
+        class_to_type = {
+            0: "bubble",      # Speech bubbles containing dialogue
+            1: "narration",   # Rectangular narration boxes
+            2: "other",       # Other manga elements
+            3: "text",        # Standalone text elements
+            4: "ui"          # User interface elements
+        }
+        return class_to_type.get(class_id, "unknown")
+    def enhance_text_region(self, image_region):
+        """
+        Enhance text clarity in an image region before OCR.
+        """
+        gray = cv2.cvtColor(image_region, cv2.COLOR_BGR2GRAY) if len(image_region.shape) == 3 else image_region
+        binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                     cv2.THRESH_BINARY, 11, 2)
+        denoised = cv2.fastNlMeansDenoising(binary, None, 10, 7, 21)
+        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
+        enhanced = clahe.apply(gray)
+        return enhanced
+    def validate_ocr_result(self, text, image_region):
+        """
+        Validate OCR results to filter out hallucinations or low-confidence detections.
+        """
+        if not text or len(text.strip()) < 2:
+            return False
+        gray = cv2.cvtColor(image_region, cv2.COLOR_BGR2GRAY) if len(image_region.shape) == 3 else image_region
+        laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
+        if laplacian_var < 50:
+            return False
+        min_val, max_val, _, _ = cv2.minMaxLoc(gray)
+        contrast = max_val - min_val
+        if contrast < 30:
+            return False
+        text_density = np.count_nonzero(gray < 128) / gray.size
+        if text_density < 0.05:
+            return False
+        return True
+    def verify_japanese_text(self, text):
+        """
+        Verify if the detected text is likely valid Japanese.
+        """
+        japanese_chars = re.findall(r'[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF]', text)
+        if len(japanese_chars) < len(text) * 0.5:
+            return False
+        for char in set(text):
+            if text.count(char) > len(text) * 0.7:
+                return False
+        return True
+    def clean_ocr_text(self, text):
+        """
+        Clean OCR text by removing non-Japanese/non-English characters and normalizing spaces.
+        Args:
+            text (str): Raw OCR text to clean
+        Returns:
+            str: Cleaned and normalized text
+        """
+        # Remove non-Japanese/non-English characters
+        text = re.sub(r'[^\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\uFF00-\uFFEFa-zA-Z0-9\s.,!?\'\"-]', '', text)
+        # Merge separated kanji words
+        text = re.sub(r'(?<=[\u4E00-\u9FFF]) (?=[\u4E00-\u9FFF])', '', text)
+        # Normalize spaces
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text
+    def split_japanese_sentences(self, text):
+        """
+        Split Japanese text into sentences based on punctuation.
+        Args:
+            text (str): Japanese text to split
+        Returns:
+            str: Text with newlines after each sentence
+        """
+        return re.sub(r'([。！？])', r'\1\n', text).strip()
+    def is_similar(self, a, b, threshold=0.8):
+        """
+        Check if two strings are similar based on sequence matching.
+        Args:
+            a (str): First string to compare
+            b (str): Second string to compare
+            threshold (float): Similarity threshold (0.0 to 1.0)
+        Returns:
+            bool: True if strings are similar enough to be considered duplicates
+        """
+        return SequenceMatcher(None, a, b).ratio() > threshold
+    def manga_style_formatting(self, text):
+        """Apply universal manga-specific formatting rules."""
+        manga_terms = {
+            'sama': '-sama',
+            'san': '-san',
+            'kun': '-kun',
+            'chan': '-chan',
+            'sensei': '-sensei',
+            'senpai': '-senpai',
+            'kouhai': '-kouhai',
+            'dono': '-dono',
+            'shi': '-shi',
+        }
+        sfx_categories = {
+            'ドドド|ゴゴゴ|ドンドン': '*RUMBLE*',
+            'バキッ|バキバキ': '*CRACK*',
+            'ガチャ|カチャ': '*CLICK*',
+            'ザー|ザァ': '*WHOOSH*',
+            'ドン|バン': '*BAM*',
+            'シーン': '*SILENCE*',
+            'キラキラ': '*SPARKLE*',
+            'ニコ': '*SMILE*',
+            'ハァハァ': '*PANT*',
+            'ドキドキ': '*THUMP*'
+        }
+        character_names = {
+            'カイドウ': 'Kaido',
+            'モンキー・ロ・ルフィ': 'Monkey D. Luffy',
+            '海賊王': 'Pirate King'
+        }
+        formatted_text = text
+        for jp, en in character_names.items():
+            formatted_text = formatted_text.replace(jp, en)
+        formatted_text = formatted_text.replace('お前', 'you')
+        formatted_text = formatted_text.replace('おれ', 'I')
+        formatted_text = re.sub(r'(!+)', r'!\1', formatted_text)
+        formatted_text = formatted_text.replace('...', '…')
+        formatted_text = re.sub(r'\?+!+|\!+\?+', '?!', formatted_text)
+        if '!' in formatted_text:
+            formatted_text = formatted_text.upper()
+        return formatted_text
+    def clean_and_translate_text(self, text, context=None):
+        """Clean and translate text with universal manga context."""
+        if text.strip() in ['！', '。', '、', '．．．', '？']:
+            return ""
+        # Remove duplicate punctuation and lines
+        cleaned_text = text.strip()
+        cleaned_text = re.sub(r'([！。、？])\1+', r'\1', cleaned_text)  # Remove duplicate punctuation
+        cleaned_text = re.sub(r'(.+?)(?:\n\1)+', r'\1', cleaned_text)  # Remove duplicate lines
+        try:
+            translation = translator_deepl.translate_text(
+                cleaned_text,
+                source_lang='JA',
+                target_lang='EN-US',
+                preserve_formatting=True
+            ).text
+            translation = self.manga_style_formatting(translation)
+            translation = re.sub(r'\s+([!?.,])', r'\1', translation)  # Fix spacing around punctuation
+            translation = re.sub(r'[\s\n]+', ' ', translation).strip()  # Clean up whitespace
+            # Remove duplicate phrases in translation
+            translation_parts = translation.split()
+            unique_parts = []
+            for part in translation_parts:
+                if not unique_parts or part.upper() != unique_parts[-1].upper():
+                    unique_parts.append(part)
+            translation = ' '.join(unique_parts)
+            logging.info(f"Translated: {cleaned_text} -> {translation}")
+            return translation
+        except Exception as e:
+            logging.error(f"Translation failed for {cleaned_text}: {e}")
+            return ""
+    def post_process_translation(self, translation, text_type=None):
+        """Apply final formatting based on text type."""
+        if text_type is None:
+            if bool(re.search(r'[ドゴバキガ]{2,}', translation)):
+                text_type = "sfx"
+            elif '!' in translation or '?' in translation:
+                text_type = "emphasis"
+        if text_type == "sfx":
+            return f"*{translation.upper()}*"
+        elif text_type == "emphasis":
+            if '!' in translation and '?' in translation:
+                return translation.upper() + "?!"
+            elif '!' in translation:
+                return translation.upper()
+            else:
+                return translation
+        return translation
+    def process_text_regions(self, detections, image):
+        """Process text regions from detections."""
+        text_regions = []
+        bubbles = []
+        for i, (box_coords, cls_id, conf) in enumerate(detections):
+            try:
+                x1, y1, x2, y2 = box_coords
+                region_type = self.determine_region_type(box_coords, image, cls_id)
+                if cls_id == 0:  # Speech bubble
+                    bubbles.append({
+                        'coords': (x1, y1, x2, y2),
+                        'type': region_type,
+                        'confidence': conf
+                    })
+                    continue
+                if cls_id != 3:  # Only process text class
+                    continue
+                cropped = image[y1:y2, x1:x2]
+                original_crop = cropped.copy()
+                enhanced_crop = self.enhance_text_region(cropped)
+                pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
+                raw_text = self.mocr(pil_crop)
+                cleaned_text = self.clean_ocr_text(raw_text)
+                formatted_text = self.split_japanese_sentences(cleaned_text)
+                # Translate the text
+                translated_text = self.clean_and_translate_text(formatted_text)
+                final_translation = self.post_process_translation(translated_text)
+                is_valid = self.validate_ocr_result(cleaned_text, cropped) and self.verify_japanese_text(cleaned_text)
+                if not is_valid:
+                    continue
+                text_regions.append({
+                    'text': formatted_text,
+                    'raw_text': raw_text,
+                    'translation': final_translation,
+                    'coords': (x1, y1, x2, y2),
+                    'type': region_type
+                })
+            except Exception as e:
+                logging.warning(f"Error processing region {i}: {str(e)}")
+                continue
+        # Deduplicate similar text regions
+        unique_regions = []
+        for region in text_regions:
+            is_duplicate = False
+            for existing in unique_regions:
+                if self.is_similar(region['text'], existing['text']):
+                    is_duplicate = True
+                    break
+            if not is_duplicate:
+                unique_regions.append(region)
+        return {
+            'text_regions': unique_regions,
+            'bubbles': bubbles
+        }
+    def insert_translation(self, image, box_coords, translated_text, font_size_multiplier=1.0):
+        """
+        Insert translated text into a text region with improved dynamic font sizing.
+        """
+        x1, y1, x2, y2 = map(int, box_coords)
+        region_width, region_height = x2-x1, y2-y1
+        # Extract the region
+        region = image[y1:y2, x1:x2].copy()
+        # Create a clean white background
+        clean_region = np.ones_like(region) * 255
+        # Create a PIL Image for text rendering
+        pil_region = Image.fromarray(cv2.cvtColor(clean_region, cv2.COLOR_BGR2RGB))
+        draw = ImageDraw.Draw(pil_region)
+        # Dynamic base font size calculation based on region dimensions and text length
+        area = region_width * region_height
+        text_length = len(translated_text)
+        # Calculate initial font size based on area and text length
+        initial_font_size = int(np.sqrt(area / (text_length + 1)) * 1.2)
+        # Adjust based on region shape
+        aspect_ratio = region_width / max(1, region_height)
+        if aspect_ratio > 2:  # Wide region
+            initial_font_size = int(initial_font_size * 0.8)
+        elif aspect_ratio < 0.5:  # Tall region
+            initial_font_size = int(initial_font_size * 0.9)
+        # Apply font size multiplier
+        initial_font_size = int(initial_font_size * font_size_multiplier)
+        # Set minimum and maximum font sizes based on region dimensions
+        min_font_size = max(12, int(min(region_width, region_height) * 0.1))
+        max_font_size = min(72, int(min(region_width, region_height) * 0.4))
+        # Clamp font size between min and max
+        font_size = max(min_font_size, min(initial_font_size, max_font_size))
+        try:
+            font = ImageFont.truetype(self.font_path, font_size)
+        except IOError:
+            logging.warning(f"Font {self.font_path} not found, using default font")
+            font = ImageFont.load_default()
+        # Calculate padding based on region size
+        padding_x = int(region_width * 0.05)
+        padding_y = int(region_height * 0.05)
+        # Calculate effective dimensions for text
+        effective_width = region_width - (2 * padding_x)
+        effective_height = region_height - (2 * padding_y)
+        # Calculate characters per line based on font metrics
+        test_text = "A" * 10  # Use a test string to measure character width
+        test_bbox = draw.textbbox((0, 0), test_text, font=font)
+        avg_char_width = (test_bbox[2] - test_bbox[0]) / 10
+        chars_per_line = max(1, int(effective_width / avg_char_width))
+        def smart_wrap(text, width):
+            """Improved text wrapping with better handling of long words and line breaks."""
+            # First try standard wrapping
+            wrapped = textwrap.fill(text, width=width)
+            lines = wrapped.split('\n')
+            # Check if any line is too long
+            max_line_length = max(len(line) for line in lines)
+            if max_line_length > width * 1.2:
+                # More aggressive wrapping for long lines
+                words = text.split()
+                lines = []
+                current_line = []
+                current_length = 0
+                for word in words:
+                    word_length = len(word)
+                    if current_length + word_length <= width:
+                        current_line.append(word)
+                        current_length += word_length + 1
+                    else:
+                        if word_length > width // 2:
+                            # Break long words with hyphens
+                            parts = [word[i:i+width//2] for i in range(0, len(word), width//2)]
+                            current_line.append(parts[0] + "-")
+                            lines.append(" ".join(current_line))
+                            current_line = parts[1:]
+                            current_length = sum(len(p) for p in current_line) + len(current_line)
+                        else:
+                            lines.append(" ".join(current_line))
+                            current_line = [word]
+                            current_length = word_length + 1
+                if current_line:
+                    lines.append(" ".join(current_line))
+                wrapped = "\n".join(lines)
+            return wrapped
+        # Wrap text with improved algorithm
+        wrapped_text = smart_wrap(translated_text, width=chars_per_line)
+        # Calculate text dimensions
+        text_bbox = draw.textbbox((0, 0), wrapped_text, font=font)
+        text_width = text_bbox[2] - text_bbox[0]
+        text_height = text_bbox[3] - text_bbox[1]
+        # If text is too big, reduce font size iteratively
+        while (text_width > effective_width or text_height > effective_height) and font_size > min_font_size:
+            font_size = int(font_size * 0.9)
+            font = ImageFont.truetype(self.font_path, font_size)
+            # Recalculate chars per line and rewrap text
+            test_bbox = draw.textbbox((0, 0), test_text, font=font)
+            avg_char_width = (test_bbox[2] - test_bbox[0]) / 10
+            chars_per_line = max(1, int(effective_width / avg_char_width))
+            wrapped_text = smart_wrap(translated_text, width=chars_per_line)
+            # Update text dimensions
+            text_bbox = draw.textbbox((0, 0), wrapped_text, font=font)
+            text_width = text_bbox[2] - text_bbox[0]
+            text_height = text_bbox[3] - text_bbox[1]
+        # Center the text
+        text_x = (region_width - text_width) // 2
+        text_y = (region_height - text_height) // 2
+        # Draw text with slight shadow for better readability
+        shadow_offset = max(1, font_size // 20)
+        draw.text((text_x + shadow_offset, text_y + shadow_offset), wrapped_text, font=font, fill=(200, 200, 200))
+        draw.text((text_x, text_y), wrapped_text, font=font, fill=(0, 0, 0))
+        # Convert back to OpenCV format and update the image
+        result_region = cv2.cvtColor(np.array(pil_region), cv2.COLOR_RGB2BGR)
+        image[y1:y2, x1:x2] = result_region
+        return image
+    def check_and_fix_truncated_text(self, image, text_regions):
+        """Enhanced function to fix text issues with improved region analysis and font sizing."""
+        fixed_image = image.copy()
+        # First pass: analyze all text regions and their context
+        regions_to_process = []
+        # Get image dimensions for context
+        img_height, img_width = image.shape[:2]
+        total_image_area = img_width * img_height
+        for region in text_regions:
+            if not region.get('translation', '').strip():
+                continue
+            x1, y1, x2, y2 = region['coords']
+            translation = region['translation']
+            # Calculate region properties
+            region_width = x2 - x1
+            region_height = y2 - y1
+            region_area = region_width * region_height
+            text_length = len(translation)
+            # Calculate relative metrics
+            area_ratio = region_area / total_image_area
+            aspect_ratio = region_width / max(1, region_height)
+            text_density = text_length / max(1, region_area)
+            # Initialize font multiplier based on various factors
+            font_multiplier = 1.0
+            priority = 0
+            # Adjust for region size relative to image
+            if area_ratio < 0.02:  # Very small region
+                font_multiplier *= 1.3
+                priority += 3
+            elif area_ratio < 0.05:  # Small region
+                font_multiplier *= 1.2
+                priority += 2
+            # Adjust for aspect ratio
+            if aspect_ratio > 2.5:  # Very wide region
+                font_multiplier *= 0.85
+                priority += 2
+            elif aspect_ratio > 1.5:  # Wide region
+                font_multiplier *= 0.9
+                priority += 1
+            elif aspect_ratio < 0.4:  # Very tall region
+                font_multiplier *= 0.9
+                priority += 2
+            # Adjust for text density
+            if text_density > 0.1:  # Very dense text
+                font_multiplier *= 0.85
+                priority += 3
+            elif text_density > 0.05:  # Dense text
+                font_multiplier *= 0.9
+                priority += 2
+            # Adjust for text content
+            if any(char in translation for char in '!?'):  # Emphasis text
+                font_multiplier *= 1.1
+                priority += 1
+            if translation.isupper():  # All caps text
+                font_multiplier *= 0.9
+                priority += 1
+            # Position-based adjustments
+            center_y = (y1 + y2) / 2
+            if center_y < img_height * 0.2:  # Top of page
+                font_multiplier *= 0.95
+            elif center_y > img_height * 0.8:  # Bottom of page
+                font_multiplier *= 0.95
+            # Check for overlapping regions
+            overlaps = 0
+            for other in text_regions:
+                if other == region:
+                    continue
+                ox1, oy1, ox2, oy2 = other['coords']
+                if (x1 < ox2 and x2 > ox1 and y1 < oy2 and y2 > oy1):
+                    overlaps += 1
+            if overlaps > 0:
+                font_multiplier *= 0.9
+                priority += overlaps
+            # Store processed region info
+            regions_to_process.append({
+                'region': region,
+                'font_multiplier': max(0.7, min(1.5, font_multiplier)),  # Clamp multiplier
+                'priority': priority,
+                'area': region_area,
+                'text_density': text_density
+            })
+        # Sort regions by priority and area
+        regions_to_process.sort(key=lambda x: (-x['priority'], -x['area'], -x['text_density']))
+        # Second pass: process regions in order
+        for item in regions_to_process:
+            region = item['region']
+            font_multiplier = item['font_multiplier']
+            try:
+                fixed_image = self.insert_translation(
+                    fixed_image,
+                    region['coords'],
+                    region['translation'],
+                    font_size_multiplier=font_multiplier
+                )
+            except Exception as e:
+                logging.warning(f"Failed to process region: {str(e)}")
+                continue
+        return fixed_image
+    def process_image(self, image_path):
+        """Process a single image with text detection, OCR, and translation"""
+        image = self.load_image(image_path)
+        detections = self.detect_text(image)
+        result_image = self.draw_detections(image, detections) if detections else None
+        processed_regions = None
+        if detections:
+            processed_regions = self.process_text_regions(detections, image)
+            if processed_regions['text_regions']:
+                # Create translated image
+                translated_image = image.copy()
+                translated_image = self.check_and_fix_truncated_text(
+                    translated_image,
+                    processed_regions['text_regions']
+                )
+                # Save translated image
+                output_dir = "translated_images"
+                os.makedirs(output_dir, exist_ok=True)
+                output_path = os.path.join(output_dir, f"translated_{os.path.basename(image_path)}")
+                cv2.imwrite(output_path, translated_image)
+                logging.info(f"Saved translated image to: {output_path}")
+                return image, detections, result_image, processed_regions, translated_image
+        return image, detections, result_image, processed_regions, None
+def process_manga_pages(image_folder, translated_dir, show_results=False):
+    """Process manga pages with text detection, OCR, and translation"""
+    os.makedirs(translated_dir, exist_ok=True)
+    detector = MangaTextDetector('best.pt')
+    # Get list of image files
+    image_files = [f for f in os.listdir(image_folder)
+                  if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
+    if not image_files:
+        if show_results and st:
+            st.warning("No image files found in the input folder")
+        return
+    if show_results and st:
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        total_steps = len(image_files)
+        current_step = 0
+    for filename in image_files:
+        try:
+            if show_results and st:
+                current_step += 1
+                progress = current_step / total_steps
+                progress_bar.progress(progress)
+                status_text.text(f"Processing image {current_step} of {total_steps}: {filename}")
+            input_path = os.path.join(image_folder, filename)
+            if show_results and st:
+                with st.spinner("Processing image..."):
+                    image, detections, result_image, processed_regions, translated_image = detector.process_image(input_path)
+            else:
+                image, detections, result_image, processed_regions, translated_image = detector.process_image(input_path)
+            if show_results and st:
+                st.subheader(f"Results for {filename}")
+                # Show all three images side by side
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    st.image(cv2.cvtColor(image, cv2.COLOR_BGR2RGB),
+                            caption="Original Image")
+                with col2:
+                    if result_image is not None:
+                        st.image(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB),
+                                caption="Detected Regions")
+                with col3:
+                    if translated_image is not None:
+                        st.image(cv2.cvtColor(translated_image, cv2.COLOR_BGR2RGB),
+                                caption="Translated Image")
+                if processed_regions and processed_regions['text_regions']:
+                    st.subheader("Detected Text Regions")
+                    tabs = st.tabs([f"Region {i+1}" for i in range(len(processed_regions['text_regions']))])
+                    for i, (tab, region) in enumerate(zip(tabs, processed_regions['text_regions'])):
+                        with tab:
+                            col1, col2 = st.columns(2)
+                            with col1:
+                                x1, y1, x2, y2 = region['coords']
+                                region_img = image[y1:y2, x1:x2]
+                                st.image(cv2.cvtColor(region_img, cv2.COLOR_BGR2RGB),
+                                       caption="Region Image")
+                            with col2:
+                                st.markdown("**Raw OCR Text:**")
+                                st.code(region['raw_text'])
+                                st.markdown("**Cleaned Text:**")
+                                st.code(region['text'])
+                                if 'translation' in region:
+                                    st.markdown("**English Translation:**")
+                                    st.code(region['translation'])
+        except Exception as e:
+            logging.error(f"Error processing {filename}: {str(e)}")
+            if show_results and st:
+                st.error(f"Error processing {filename}: {str(e)}")
+            continue
+    if show_results and st:
+        progress_bar.empty()
+        status_text.empty()
+        st.success(f"✅ Processing complete! Processed {len(image_files)} images.")
+def main():
+    st.title("Manga Text Detection, OCR, and Translation")
+    st.write("Upload manga pages to detect, recognize, and translate text")
+    uploaded_files = st.file_uploader("Choose manga pages", type=['jpg', 'jpeg', 'png'], accept_multiple_files=True)
+    if uploaded_files:
+        temp_input_dir = "temp_input"
+        temp_output_dir = "temp_output"
+        os.makedirs(temp_input_dir, exist_ok=True)
+        try:
+            for uploaded_file in uploaded_files:
+                with open(os.path.join(temp_input_dir, uploaded_file.name), "wb") as f:
+                    f.write(uploaded_file.getbuffer())
+            process_manga_pages(temp_input_dir, temp_output_dir, show_results=True)
+        except Exception as e:
+            st.error(f"Error: {str(e)}")
+        finally:
+            import shutil
+            if os.path.exists(temp_input_dir):
+                shutil.rmtree(temp_input_dir)
+            if os.path.exists(temp_output_dir):
+                shutil.rmtree(temp_output_dir)
+if __name__ == "__main__":
+    main()