Spaces:

NaseemTahir
/

Manga_Proj

Sleeping

App Files Files Community

NaseemTahir commited on Mar 3, 2025

Commit

1448da0

verified ·

1 Parent(s): 75adf64

Create app.py

Browse files

Files changed (1) hide show

app.py +285 -0

app.py ADDED Viewed

	@@ -0,0 +1,285 @@

+import streamlit as st
+import os
+import tempfile
+from PIL import Image
+import cv2
+import numpy as np
+import pytesseract
+import re
+import csv
+from PIL import Image, ImageDraw, ImageFont
+from ultralytics import YOLO
+import keras_ocr
+from datetime import datetime
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+from huggingface_hub import hf_hub_download
+# Initialize the multilingual similarity model
+similarity_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
+def preprocess_text(text):
+    """Normalize text for comparison"""
+    text = text.lower()
+    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
+    text = ' '.join(text.split())  # Normalize whitespace
+    return text
+def load_translations(csv_path):
+    """Load translations with precomputed embeddings"""
+    translations = []
+    with open(csv_path, mode='r', encoding='utf-8') as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            original = preprocess_text(row['original'])
+            # Encode the original text during loading
+            embedding = similarity_model.encode(original, convert_to_tensor=False)
+            translations.append({
+                'original_raw': row['original'].strip(),
+                'original_processed': original,
+                'translated': row['translated'].strip(),
+                'embedding': embedding
+            })
+    return translations
+def find_best_match(text, translations, threshold=0.6):
+    """Find best match using cosine similarity"""
+    processed = preprocess_text(text)
+    query_embed = similarity_model.encode(processed, convert_to_tensor=False)
+    best_match = None
+    highest_score = 0
+    for entry in translations:
+        score = cosine_similarity([query_embed], [entry['embedding']])[0][0]
+        if score > highest_score and score >= threshold:
+            highest_score = score
+            best_match = entry
+            best_match['score'] = round(score * 100, 1)  # Convert to percentage
+    return best_match
+# Enhanced Inpainting Functions
+def create_text_mask(region, pipeline):
+    prediction_groups = pipeline.recognize([region])
+    mask = np.zeros(region.shape[:2], dtype="uint8")
+    for box in prediction_groups[0]:
+        poly = np.array(box[1], dtype=np.int32)
+        cv2.fillPoly(mask, [poly], 255)
+    return cv2.dilate(mask, np.ones((5,5), np.uint8), iterations=2)
+def process_bubble_region(region, pipeline):
+    mask = create_text_mask(region, pipeline)
+    inpainted = cv2.inpaint(region, mask, 25, cv2.INPAINT_TELEA)
+    return cv2.medianBlur(inpainted, 5)
+# Text Rendering Functions (Improved Version)
+def fit_text_to_box(x, y, w, h, text, font_path, max_size=50, min_size=8, padding_top=3):
+    for size in range(max_size, min_size-1, -1):
+        font = ImageFont.truetype(font_path, size)
+        temp_draw = ImageDraw.Draw(Image.new('RGB', (1,1)))
+        # Calculate line breaks
+        lines = []
+        words = text.split()
+        current_line = []
+        max_width = w * 0.9  # Allow 10% padding
+        for word in words:
+            test_line = ' '.join(current_line + [word])
+            bbox = temp_draw.textbbox((0,0), test_line, font=font)
+            line_width = bbox[2] - bbox[0]
+            if line_width < max_width:
+                current_line.append(word)
+            else:
+                lines.append(' '.join(current_line))
+                current_line = [word]
+        lines.append(' '.join(current_line))
+        # Calculate total height
+        line_height = font.getbbox("Mg")[3] - font.getbbox("Mg")[1]
+        total_height = len(lines) * line_height
+        if total_height <= h * 0.9:  # Allow 10% vertical padding
+            y_position = y + padding_top + (h - total_height) // 2
+            return lines, font, line_height, y_position
+    # Fallback to minimum size
+    font = ImageFont.truetype(font_path, min_size)
+    return [text], font, font.getbbox("Mg")[3], y + padding_top
+def refine_ocr_text(text):
+    """Clean OCR output from common artifacts"""
+    patterns = [
+        r'[\x00-\x1F\x7F-\x9F]',  # Remove control characters
+        r'\s{2,}',                 # Replace multiple spaces
+        r'^\s+|\s+$'               # Trim whitespace
+    ]
+    for pattern in patterns:
+        text = re.sub(pattern, ' ', text)
+    return text.strip()
+# Main Processing Pipeline
+def process_image(input_path, output_path, model_path, font_path, csv_path, match_threshold=0.5):
+    # Initialize components
+    model = YOLO(model_path)
+    pipeline = keras_ocr.pipeline.Pipeline()
+    translations = load_translations(csv_path)
+    processing_log = []
+    # Load original image
+    original = cv2.cvtColor(cv2.imread(input_path), cv2.COLOR_BGR2RGB)
+    working_img = original.copy()
+    # Detect text regions
+    results = model.predict(original, verbose=False)[0]
+    boxes = results.boxes.xyxy.cpu().numpy()
+    # First pass: Clean all text regions
+    for box in boxes:
+        x1, y1, x2, y2 = map(int, box)
+        x1, y1 = max(x1,0), max(y1,0)
+        x2, y2 = min(x2,original.shape[1]), min(y2,original.shape[0])
+        bubble_region = original[y1:y2, x1:x2]
+        if bubble_region.size == 0: continue
+        # Clean the region
+        cleaned_region = process_bubble_region(bubble_region, pipeline)
+        working_img[y1:y2, x1:x2] = cleaned_region
+    # Prepare image for text rendering
+    pil_img = Image.fromarray(working_img)
+    draw = ImageDraw.Draw(pil_img)
+    # Second pass: OCR and text placement
+    for idx, box in enumerate(boxes):
+        x1, y1, x2, y2 = map(int, box)
+        w, h = x2 - x1, y2 - y1
+        # OCR processing on original image
+        bubble_region = original[y1:y2, x1:x2]
+        text = pytesseract.image_to_string(bubble_region, lang='ita').strip()
+        text = re.sub(r'\s+', ' ', text)
+        if not text: continue
+        print(f"Processing region {idx+1}: Extracted text: {text}")
+        # Find best matching translation
+        best_match = find_best_match(text, translations, match_threshold)
+        if best_match:
+            translated_text = best_match['translated']
+            print(f"Matched (Score: {best_match['score']}): {best_match['original_raw']}")
+        else:
+            translated_text = text  # Fallback to original text
+            print(f"No good match found for: {text}")
+        # Render text
+        lines, font, line_height, y_pos = fit_text_to_box(
+            x1, y1, w, h, translated_text, font_path
+        )
+        for line in lines:
+            bbox = draw.textbbox((x1, y_pos), line, font=font)
+            text_w = bbox[2] - bbox[0]
+            draw.text(
+                (x1 + (w - text_w)//2, y_pos),
+                line,
+                font=font,
+                fill="black"  # This should be the color you want for the text
+            )
+            y_pos += line_height
+        # Log results
+        processing_log.append({
+            "region": idx+1,
+            "coordinates": f"({x1},{y1})-({x2},{y2})",
+            "original": text,
+            "translated": translated_text,
+            "score": best_match['score'] if best_match else 0
+        })
+    # Save outputs
+    pil_img.save(output_path)
+    report_path = os.path.splitext(output_path)[0] + "_report.csv"
+    with open(report_path, 'w', encoding='utf-8') as f:
+        writer = csv.DictWriter(f, fieldnames=processing_log[0].keys())
+        writer.writeheader()
+        writer.writerows(processing_log)
+    return pil_img, processing_log
+# Streamlit App Configuration
+st.set_page_config(page_title="Comic Translation Pipeline", layout="wide")
+# Sidebar for Input Parameters
+with st.sidebar:
+    st.header("Configuration")
+    yolo_model_path = hf_hub_download(
+        repo_id="NaseemTahir/comic-text-segmenter",
+        filename="comic-text-segmenter.pt"
+    )
+    match_threshold = st.slider("Translation Match Threshold", 0, 100, 75)
+# Main Interface
+st.title("Comic Translation Pipeline")
+st.write("Upload a comic image and translation CSV to get started")
+# File Upload Section
+col1, col2, col3 = st.columns(3)
+with col1:
+    image_file = st.file_uploader("Upload Comic Image", type=["jpg", "png", "jpeg"])
+with col2:
+    csv_file = st.file_uploader("Upload Translations CSV", type=["csv"])
+with col3:
+    font_file = st.file_uploader("Upload Font File", type=["ttf", "otf"])
+# Processing Pipeline
+if st.button("Run Full Pipeline") and all([image_file, csv_file, font_file]):
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # Save uploaded files
+        image_path = os.path.join(tmp_dir, image_file.name)
+        with open(image_path, "wb") as f:
+            f.write(image_file.getbuffer())
+        csv_path = os.path.join(tmp_dir, csv_file.name)
+        with open(csv_path, "wb") as f:
+            f.write(csv_file.getbuffer())
+        font_path = os.path.join(tmp_dir, font_file.name)
+        with open(font_path, "wb") as f:
+            f.write(font_file.getbuffer())
+        # Create output directory
+        output_dir = os.path.join(tmp_dir, "output")
+        os.makedirs(output_dir, exist_ok=True)
+        # Run pipeline
+        try:
+            with st.spinner("Processing..."):
+                final_output = os.path.join(output_dir, "final_output.png")
+                process_image(
+                    input_path=image_path,
+                    output_path=final_output,
+                    model_path=yolo_model_path,
+                    font_path=font_path,
+                    csv_path=csv_path,
+                    match_threshold=match_threshold / 100
+                )
+            # Display results
+            st.success("Processing complete!")
+            st.image(Image.open(final_output), caption="Final Result", use_column_width=True)
+            # Download button
+            with open(final_output, "rb") as f:
+                st.download_button(
+                    label="Download Final Image",
+                    data=f,
+                    file_name="translated_comic.png",
+                    mime="image/png"
+                )
+        except Exception as e:
+            st.error(f"Error processing image: {str(e)}")