| | import gradio as gr |
| | import cv2 |
| | import numpy as np |
| | import easyocr |
| | from PIL import Image, ImageDraw |
| | import os |
| | import tempfile |
| | import torch |
| | from torch.hub import download_url_to_file |
| |
|
| | |
| | try: |
| | from pptx import Presentation |
| | from pptx.util import Pt |
| | from pptx.dml.color import RGBColor |
| | except ImportError: |
| | print("⚠️ python-pptx is not installed. Please run: pip install python-pptx") |
| |
|
| | |
| | |
| | |
| | class SafeLama: |
| | def __init__(self): |
| | |
| | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
| | print(f"LaMa is running on: {self.device}") |
| | |
| | |
| | self.model_url = "https://github.com/sanster/models/releases/download/add_big_lama/big-lama.pt" |
| | self.model_path = os.path.join(os.path.expanduser("~"), ".cache", "big-lama.pt") |
| | |
| | if not os.path.exists(self.model_path): |
| | print(f"Downloading LaMa model to {self.model_path}...") |
| | os.makedirs(os.path.dirname(self.model_path), exist_ok=True) |
| | download_url_to_file(self.model_url, self.model_path) |
| | |
| | |
| | try: |
| | self.model = torch.jit.load(self.model_path, map_location=self.device) |
| | self.model.eval() |
| | self.model.to(self.device) |
| | print("LaMa model loaded successfully.") |
| | except Exception as e: |
| | print(f"Fatal Error loading LaMa model: {e}") |
| | raise e |
| |
|
| | def __call__(self, image: Image.Image, mask: Image.Image) -> Image.Image: |
| | """ |
| | image: PIL Image (RGB) |
| | mask: PIL Image (L or RGB, 0=Keep, 255=Remove) |
| | """ |
| | |
| | img_np = np.array(image).astype(np.float32) / 255.0 |
| | mask_np = np.array(mask.convert("L")).astype(np.float32) / 255.0 |
| | |
| | |
| | img_t = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0).to(self.device) |
| | mask_t = torch.from_numpy(mask_np).unsqueeze(0).unsqueeze(0).to(self.device) |
| | mask_t = (mask_t > 0.5).float() |
| | |
| | |
| | with torch.no_grad(): |
| | output = self.model(img_t, mask_t) |
| | |
| | |
| | cur_res = output[0].permute(1, 2, 0).detach().cpu().numpy() |
| | cur_res = np.clip(cur_res * 255, 0, 255).astype(np.uint8) |
| | |
| | return Image.fromarray(cur_res) |
| |
|
| |
|
| | |
| | |
| | |
| | class SlideCleanerCore: |
| | def __init__(self): |
| | print("Initializing EasyOCR...") |
| | use_gpu = torch.cuda.is_available() |
| | self.reader = easyocr.Reader(['ja', 'en'], gpu=use_gpu) |
| | |
| | self.lama_model = None |
| | try: |
| | print("Loading LaMa wrapper...") |
| | self.lama_model = SafeLama() |
| | except Exception as e: |
| | print(f"LaMa load failed: {e}") |
| | pass |
| |
|
| | def detect_text_initial(self, image_np): |
| | """ |
| | OCRを実行し、編集可能なボックス状態リストを作成して返す |
| | """ |
| | print("Running OCR detection...") |
| | results = self.reader.readtext(image_np) |
| | box_states = [] |
| | for (bbox, text, prob) in results: |
| | (tl, tr, br, bl) = bbox |
| | |
| | x1 = int(min(tl[0], bl[0])) |
| | y1 = int(min(tl[1], tr[1])) |
| | x2 = int(max(tr[0], br[0])) |
| | y2 = int(max(bl[1], br[1])) |
| | |
| | box_states.append({ |
| | 'bbox': [x1, y1, x2, y2], |
| | 'text': text, |
| | 'active': True |
| | }) |
| | return box_states |
| |
|
| | def draw_preview(self, image_np, box_states, temp_point=None, highlight_idx=None): |
| | """ |
| | プレビュー画像の描画 |
| | - Activeなボックス: 赤枠 (消去対象) |
| | - Highlightなボックス(統合待ち): シアン枠 |
| | - 手動追加中の始点: 黄色い点 |
| | """ |
| | pil_img = Image.fromarray(image_np).convert("RGBA") |
| | overlay = Image.new("RGBA", pil_img.size, (255, 255, 255, 0)) |
| | draw = ImageDraw.Draw(overlay) |
| | |
| | for i, item in enumerate(box_states): |
| | if item['active']: |
| | x1, y1, x2, y2 = item['bbox'] |
| | |
| | |
| | fill_color = (255, 0, 0, 100) |
| | outline_color = "red" |
| | |
| | |
| | if highlight_idx is not None and i == highlight_idx: |
| | fill_color = (0, 255, 255, 150) |
| | outline_color = "cyan" |
| | |
| | draw.rectangle([x1, y1, x2, y2], fill=fill_color, outline=outline_color, width=2) |
| |
|
| | |
| | if temp_point: |
| | tx, ty = temp_point |
| | r = 5 |
| | draw.ellipse((tx-r, ty-r, tx+r, ty+r), fill="yellow", outline="black") |
| |
|
| | return Image.alpha_composite(pil_img, overlay).convert("RGB") |
| |
|
| | def create_mask_from_states(self, image_shape, box_states, dilation=10): |
| | """ |
| | 現在のボックス状態からInpainting用のマスク画像を生成 |
| | """ |
| | h, w = image_shape[:2] |
| | mask = np.zeros((h, w), dtype=np.uint8) |
| | |
| | for item in box_states: |
| | if item['active']: |
| | x1, y1, x2, y2 = item['bbox'] |
| | cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1) |
| | |
| | if dilation > 0: |
| | kernel = np.ones((dilation, dilation), np.uint8) |
| | mask = cv2.dilate(mask, kernel, iterations=1) |
| | |
| | return mask |
| |
|
| | def inpaint_image(self, image_np, mask_np, method): |
| | """ |
| | 文字消去実行 |
| | """ |
| | if method == "LaMa" and self.lama_model is not None: |
| | pil_img = Image.fromarray(image_np) |
| | pil_mask = Image.fromarray(mask_np) |
| | return np.array(self.lama_model(pil_img, pil_mask)) |
| | else: |
| | |
| | return cv2.inpaint(image_np, mask_np, 3, cv2.INPAINT_TELEA) |
| |
|
| | def add_slide_to_prs(self, prs, original_img_np, clean_img_np, box_states): |
| | """ |
| | PPTXのスライドを1枚追加する処理 (エラー回避・型変換強化版) |
| | """ |
| | try: |
| | slide = prs.slides.add_slide(prs.slide_layouts[6]) |
| | |
| | |
| | img_h, img_w = original_img_np.shape[:2] |
| | img_h, img_w = int(img_h), int(img_w) |
| | |
| | |
| | with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_bg: |
| | Image.fromarray(clean_img_np).save(tmp_bg.name) |
| | slide.shapes.add_picture(tmp_bg.name, 0, 0, width=prs.slide_width, height=prs.slide_height) |
| | tmp_bg_path = tmp_bg.name |
| |
|
| | |
| | slide_width_emu = prs.slide_width |
| | slide_height_emu = prs.slide_height |
| |
|
| | |
| | for item in box_states: |
| | if item['active']: |
| | try: |
| | bbox = item['bbox'] |
| | text = item['text'] |
| | |
| | |
| | x1, y1, x2, y2 = [int(v) for v in bbox] |
| |
|
| | |
| | if x2 <= x1 or y2 <= y1: |
| | continue |
| |
|
| | |
| | rel_x = x1 / img_w |
| | rel_y = y1 / img_h |
| | rel_w = (x2 - x1) / img_w |
| | rel_h = (y2 - y1) / img_h |
| |
|
| | |
| | left = int(slide_width_emu * rel_x) |
| | top = int(slide_height_emu * rel_y) |
| | width = int(slide_width_emu * rel_w) |
| | height = int(slide_height_emu * rel_h) |
| |
|
| | |
| | textbox = slide.shapes.add_textbox(left, top, width, height) |
| | tf = textbox.text_frame |
| | tf.word_wrap = True |
| | |
| | p = tf.paragraphs[0] |
| | p.text = str(text) |
| |
|
| | |
| | |
| | font_size_emu = slide_height_emu * rel_h * 0.7 |
| | |
| | p.font.size = Pt(max(8, font_size_emu / 12700)) |
| | |
| | |
| | p.font.color.rgb = RGBColor(0, 0, 0) |
| | p.font.name = "Meiryo" |
| | |
| | except Exception as e_box: |
| | print(f"Skipping text box due to error: {e_box}") |
| | continue |
| | |
| | |
| | if os.path.exists(tmp_bg_path): os.remove(tmp_bg_path) |
| | |
| | except Exception as e_slide: |
| | print(f"Error adding slide: {e_slide}") |
| |
|
| |
|
| | |
| | core = SlideCleanerCore() |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def on_files_upload(files): |
| | """ |
| | ファイルアップロード時の初期化処理 |
| | """ |
| | if not files: return None, [], 0, [], "No files selected." |
| | |
| | file_paths = [f.name for f in files] |
| | |
| | |
| | |
| | batch_data = [{'path': p, 'boxes': None} for p in file_paths] |
| | |
| | |
| | try: |
| | first_img = Image.open(file_paths[0]).convert("RGB") |
| | first_np = np.array(first_img) |
| | |
| | first_boxes = core.detect_text_initial(first_np) |
| | batch_data[0]['boxes'] = first_boxes |
| | |
| | preview = core.draw_preview(first_np, first_boxes) |
| | msg = f"Loaded {len(files)} images. Showing 1/{len(files)}." |
| | except Exception as e: |
| | return None, [], 0, [], f"Error loading image: {e}" |
| | |
| | return preview, batch_data, 0, first_boxes, msg |
| |
|
| | def load_image_at_index(batch_data, index): |
| | """ |
| | 指定インデックスの画像を読み込み、必要ならOCRしてプレビューを返す |
| | """ |
| | if not batch_data or index < 0 or index >= len(batch_data): |
| | return None, None, None |
| | |
| | path = batch_data[index]['path'] |
| | try: |
| | img = Image.open(path).convert("RGB") |
| | img_np = np.array(img) |
| | |
| | |
| | if batch_data[index]['boxes'] is None: |
| | boxes = core.detect_text_initial(img_np) |
| | batch_data[index]['boxes'] = boxes |
| | else: |
| | boxes = batch_data[index]['boxes'] |
| | |
| | preview = core.draw_preview(img_np, boxes) |
| | return preview, boxes, img_np |
| | except Exception as e: |
| | print(f"Load error: {e}") |
| | return None, None, None |
| |
|
| | def navigate(direction, batch_data, current_index, current_boxes_state): |
| | """ |
| | Prev/Next ボタン処理 |
| | """ |
| | if not batch_data: return None, batch_data, 0, [], "No Data" |
| | |
| | |
| | batch_data[current_index]['boxes'] = current_boxes_state |
| | |
| | new_index = current_index + direction |
| | |
| | if new_index < 0: new_index = 0 |
| | if new_index >= len(batch_data): new_index = len(batch_data) - 1 |
| | |
| | preview, new_boxes, _ = load_image_at_index(batch_data, new_index) |
| | |
| | return preview, batch_data, new_index, new_boxes, f"Image {new_index+1}/{len(batch_data)}" |
| |
|
| | def canvas_click(batch_data, current_index, current_boxes_state, drawing_point, merge_src_idx, mode, evt: gr.SelectData): |
| | """ |
| | 画像クリック時のアクション分岐 |
| | """ |
| | if not batch_data: return None, current_boxes_state, drawing_point, merge_src_idx, "No Data" |
| | |
| | path = batch_data[current_index]['path'] |
| | img_np = np.array(Image.open(path).convert("RGB")) |
| | click_x, click_y = evt.index |
| | msg = "" |
| |
|
| | |
| | |
| | |
| | if mode.startswith("Toggle"): |
| | for item in current_boxes_state: |
| | x1, y1, x2, y2 = item['bbox'] |
| | |
| | if x1 <= click_x <= x2 and y1 <= click_y <= y2: |
| | item['active'] = not item['active'] |
| | break |
| | |
| | drawing_point = None |
| | merge_src_idx = None |
| | msg = "Toggled box state." |
| |
|
| | |
| | |
| | |
| | elif mode.startswith("Merge"): |
| | |
| | clicked_idx = -1 |
| | for i, item in enumerate(current_boxes_state): |
| | x1, y1, x2, y2 = item['bbox'] |
| | if x1 <= click_x <= x2 and y1 <= click_y <= y2: |
| | clicked_idx = i |
| | break |
| | |
| | if clicked_idx != -1: |
| | if merge_src_idx is None: |
| | |
| | merge_src_idx = clicked_idx |
| | msg = "Select 2nd box to merge." |
| | else: |
| | if merge_src_idx == clicked_idx: |
| | |
| | merge_src_idx = None |
| | msg = "Merge canceled (Same box)." |
| | else: |
| | |
| | box_a = current_boxes_state[merge_src_idx] |
| | box_b = current_boxes_state[clicked_idx] |
| | |
| | |
| | nx1 = min(box_a['bbox'][0], box_b['bbox'][0]) |
| | ny1 = min(box_a['bbox'][1], box_b['bbox'][1]) |
| | nx2 = max(box_a['bbox'][2], box_b['bbox'][2]) |
| | ny2 = max(box_a['bbox'][3], box_b['bbox'][3]) |
| | |
| | |
| | if box_a['bbox'][1] < box_b['bbox'][1]: |
| | new_text = str(box_a['text']) + " " + str(box_b['text']) |
| | else: |
| | new_text = str(box_b['text']) + " " + str(box_a['text']) |
| |
|
| | new_box = {'bbox': [nx1, ny1, nx2, ny2], 'text': new_text, 'active': True} |
| | |
| | |
| | indices = sorted([merge_src_idx, clicked_idx], reverse=True) |
| | for idx in indices: |
| | current_boxes_state.pop(idx) |
| | |
| | |
| | current_boxes_state.append(new_box) |
| | |
| | merge_src_idx = None |
| | msg = "Merged successfully!" |
| | else: |
| | |
| | merge_src_idx = None |
| | msg = "Canceled." |
| |
|
| | drawing_point = None |
| |
|
| | |
| | |
| | |
| | else: |
| | merge_src_idx = None |
| | if drawing_point is None: |
| | |
| | drawing_point = (click_x, click_y) |
| | msg = "Start point set. Click End point." |
| | else: |
| | |
| | sx, sy = drawing_point |
| | x1, x2 = int(min(sx, click_x)), int(max(sx, click_x)) |
| | y1, y2 = int(min(sy, click_y)), int(max(sy, click_y)) |
| | |
| | |
| | if (x2 - x1) > 5 and (y2 - y1) > 5: |
| | current_boxes_state.append({'bbox': [x1, y1, x2, y2], 'text': "", 'active': True}) |
| | msg = "Manual Box added." |
| | else: |
| | msg = "Box too small." |
| | |
| | drawing_point = None |
| |
|
| | |
| | batch_data[current_index]['boxes'] = current_boxes_state |
| | |
| | preview = core.draw_preview(img_np, current_boxes_state, temp_point=drawing_point, highlight_idx=merge_src_idx) |
| | |
| | return preview, batch_data, current_boxes_state, drawing_point, merge_src_idx, msg |
| |
|
| |
|
| | def generate_final_pptx_batch(batch_data, dilation, method, progress=gr.Progress()): |
| | """ |
| | 全ページを結合したPPTXを生成する (エラーハンドリング強化版) |
| | """ |
| | if not batch_data: return "No data", None |
| | |
| | try: |
| | prs = Presentation() |
| | |
| | |
| | if not os.path.exists(batch_data[0]['path']): |
| | return "Error: Image file not found.", None |
| |
|
| | img0 = Image.open(batch_data[0]['path']) |
| | prs.slide_width = Pt(img0.width) |
| | prs.slide_height = Pt(img0.height) |
| | |
| | |
| | for i, data in enumerate(progress.tqdm(batch_data, desc="Generating Slides")): |
| | if not os.path.exists(data['path']): |
| | continue |
| |
|
| | img = Image.open(data['path']).convert("RGB") |
| | img_np = np.array(img) |
| | |
| | |
| | boxes = data['boxes'] |
| | if boxes is None: |
| | boxes = core.detect_text_initial(img_np) |
| | |
| | |
| | mask = core.create_mask_from_states(img_np.shape, boxes, dilation) |
| | clean_img = core.inpaint_image(img_np, mask, method) |
| | |
| | |
| | core.add_slide_to_prs(prs, img_np, clean_img, boxes) |
| |
|
| | |
| | out_path = "final_presentation.pptx" |
| | prs.save(out_path) |
| | return f"Completed! {len(batch_data)} slides merged.", out_path |
| | |
| | except Exception as e: |
| | import traceback |
| | traceback.print_exc() |
| | return f"Error during PPTX generation: {e}", None |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | theme = gr.themes.Soft( |
| | primary_hue="indigo", |
| | secondary_hue="slate", |
| | neutral_hue="slate", |
| | font=["Meiryo", "sans-serif"] |
| | ).set( |
| | body_background_fill="#0b0f19", |
| | block_background_fill="#111827", |
| | block_border_color="#374151", |
| | input_background_fill="#1f2937", |
| | button_primary_background_fill="#4f46e5", |
| | body_text_color="#f3f4f6", |
| | block_label_text_color="#f3f4f6", |
| | block_title_text_color="#f3f4f6" |
| | ) |
| |
|
| | css = """ |
| | footer {visibility: hidden} |
| | .gradio-container {min-height: 100vh;} |
| | """ |
| |
|
| | with gr.Blocks(theme=theme, css=css, title="Slide Fixer Pro Max") as demo: |
| | gr.Markdown("# 🚀 Slide Fixer Pro Max") |
| | gr.Markdown("画像をアップロード -> 確認・編集 -> **文字が消えた編集可能なPPTX** を生成!") |
| |
|
| | |
| | batch_data_state = gr.State([]) |
| | current_idx_state = gr.State(0) |
| | current_boxes_state = gr.State([]) |
| | draw_point_state = gr.State(None) |
| | merge_src_idx_state = gr.State(None) |
| |
|
| | |
| | with gr.Row(): |
| | files_input = gr.File(file_count="multiple", label="1. Upload Images (Multiple Support)") |
| | |
| | with gr.Row(): |
| | |
| | with gr.Column(scale=2): |
| | canvas = gr.Image(label="Preview & Click Editor", interactive=False) |
| | |
| | |
| | with gr.Column(scale=1): |
| | info_text = gr.Textbox(label="Status", value="Waiting for upload...") |
| | |
| | |
| | with gr.Row(): |
| | prev_btn = gr.Button("<< Prev Image") |
| | next_btn = gr.Button("Next Image >>") |
| | |
| | gr.Markdown("### 🛠️ Edit Mode") |
| | edit_mode = gr.Radio( |
| | [ |
| | "Toggle (Click to Remove/Keep)", |
| | "Merge Boxes (Click 2 Boxes)", |
| | "Add Manual Box (Click Start/End)" |
| | ], |
| | value="Toggle (Click to Remove/Keep)", |
| | label="Mode Select" |
| | ) |
| | gr.Markdown(""" |
| | - **Toggle**: 赤枠をクリックで ON/OFF 切り替え。 |
| | - **Merge**: 離れた枠を2つクリックして合体。 |
| | - **Add**: 認識されなかった文字を手動で囲む。 |
| | """) |
| | |
| | gr.Markdown("---") |
| | |
| | dilation_sld = gr.Slider(0, 30, 10, label="Dilation (Mask Expansion)") |
| | method_radio = gr.Radio(["LaMa", "OpenCV"], value="LaMa", label="Inpaint Method") |
| | |
| | |
| | gen_btn = gr.Button("✨ Generate Merged PPTX (All Black Text)", variant="primary", size="lg") |
| | dl_file = gr.File(label="Download Result") |
| |
|
| | |
| |
|
| | |
| | files_input.upload( |
| | fn=on_files_upload, |
| | inputs=[files_input], |
| | outputs=[canvas, batch_data_state, current_idx_state, current_boxes_state, info_text] |
| | ) |
| |
|
| | |
| | prev_btn.click( |
| | fn=lambda d, b, i, c: navigate(-1, b, i, c), |
| | inputs=[batch_data_state, batch_data_state, current_idx_state, current_boxes_state], |
| | outputs=[canvas, batch_data_state, current_idx_state, current_boxes_state, info_text] |
| | ) |
| |
|
| | |
| | next_btn.click( |
| | fn=lambda d, b, i, c: navigate(1, b, i, c), |
| | inputs=[batch_data_state, batch_data_state, current_idx_state, current_boxes_state], |
| | outputs=[canvas, batch_data_state, current_idx_state, current_boxes_state, info_text] |
| | ) |
| |
|
| | |
| | canvas.select( |
| | fn=canvas_click, |
| | inputs=[ |
| | batch_data_state, |
| | current_idx_state, |
| | current_boxes_state, |
| | draw_point_state, |
| | merge_src_idx_state, |
| | edit_mode |
| | ], |
| | outputs=[ |
| | canvas, |
| | batch_data_state, |
| | current_boxes_state, |
| | draw_point_state, |
| | merge_src_idx_state, |
| | info_text |
| | ] |
| | ) |
| |
|
| | |
| | gen_btn.click( |
| | fn=generate_final_pptx_batch, |
| | inputs=[batch_data_state, dilation_sld, method_radio], |
| | outputs=[info_text, dl_file] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | |
| | demo.queue().launch(share=False) |