Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from pptx import Presentation | |
| from pptx.util import Pt, Inches | |
| from pptx.shapes.group import GroupShape | |
| from pptx.dml.color import RGBColor | |
| from pptx.enum.text import PP_ALIGN | |
| from pptx.shapes.picture import Picture | |
| import json | |
| import os | |
| from PIL import Image | |
| import io | |
| def print_json(item): | |
| item_json = json.dumps(item, ensure_ascii=False, indent=4) | |
| return item_json | |
| def transfer_textbox_content_in_group(group_shape): | |
| """Edit the content of text boxes within a group shape.""" | |
| group_shape_item = {} | |
| for l, shape in enumerate(group_shape.shapes): | |
| shape_item = {} | |
| if shape.has_text_frame: | |
| shape_item['type'] = "text" | |
| shape_item['location'] = (shape.left, shape.top) | |
| text_frame = shape.text_frame | |
| for r, paragraph in enumerate(text_frame.paragraphs): | |
| original_run = paragraph.runs[0] | |
| paragraph_item = {} | |
| paragraph_item['text'] = paragraph.text | |
| paragraph_item['align'] = paragraph.alignment | |
| font_item = {} | |
| font_item['name'] = original_run.font.name | |
| font_item['bold'] = original_run.font.bold | |
| font_item['italic'] = original_run.font.italic | |
| font_item['underline'] = original_run.font.underline | |
| font_item['color'] = original_run.font.color.rgb | |
| font_item['language_id'] = original_run.font.language_id | |
| paragraph_item['font'] = font_item | |
| shape_item[f'paragraph_{r}'] = paragraph_item | |
| group_shape_item[f"shape_{l}"] = shape_item | |
| return group_shape_item | |
| def transfer_to_structure(pptx_file, images_dir_path): | |
| item = {} | |
| prs = Presentation(pptx_file) | |
| image_path_list = [] | |
| # Iterate through each slide in the presentation | |
| for i, slide in enumerate(prs.slides): | |
| # Iterate through each shape in the slide | |
| slide_item = {} | |
| for j, shape in enumerate(slide.shapes): | |
| # If the shape is a group, process its shapes | |
| shape_item = {} | |
| if shape.has_text_frame: | |
| text_frame = shape.text_frame | |
| for paragraph in text_frame.paragraphs: | |
| # Clear the existing text but keep the paragraph | |
| paragraph.clear() | |
| # Add a new run with the new content and copy font style | |
| run = paragraph.add_run() | |
| run.text = new_content | |
| if paragraph.runs: | |
| original_run = paragraph.runs[0] | |
| copy_font_style(original_run, run) | |
| elif isinstance(shape, GroupShape): | |
| shape_item['type'] = "group" | |
| group_shape_item = transfer_textbox_content_in_group(shape) | |
| shape_item['group_content'] = group_shape_item | |
| pass | |
| elif isinstance(shape, Picture): | |
| shape_item['type'] = "picture" | |
| image_path = os.path.join(images_dir_path, f"picture_{j}.png") | |
| image_path_list.append(image_path) | |
| shape_item['image_path'] = image_path | |
| shape_item['size'] = shape.image.size # width, height | |
| shape_item['dpi'] = shape.image.dpi # (horz_dpi, vert_dpi) | |
| shape_item['location'] = (shape.left, shape.top) | |
| shape_item['location_inches'] = (Inches(shape.left).inches, Inches(shape.top).inches) | |
| image_stream = io.BytesIO(shape.image.blob) | |
| shape_image = Image.open(image_stream) | |
| shape_image.save(image_path) | |
| pass | |
| slide_item[f"shape_{j}"] = shape_item | |
| item[f"slide_{i}"] = slide_item | |
| return print_json(item), image_path_list | |
| def copy_font_style(original_run, new_run): | |
| new_run.font.name = original_run.font.name | |
| new_run.font.bold = original_run.font.bold | |
| new_run.font.italic = original_run.font.italic | |
| new_run.font.underline = original_run.font.underline | |
| new_run.font.color.rgb = original_run.font.color.rgb | |
| new_run.font.language_id = original_run.font.language_id | |
| def process_pptx(pptx_file): | |
| images_dir_path = "images" | |
| if not os.path.exists(images_dir_path): | |
| os.makedirs(images_dir_path) | |
| json_output, image_paths = transfer_to_structure(pptx_file.name, images_dir_path) | |
| return json_output, image_paths | |
| # Gradio interface | |
| iface = gr.Interface( | |
| fn=process_pptx, | |
| inputs=gr.File(label="Upload PowerPoint File"), | |
| outputs=[gr.Textbox(label="JSON Output"), gr.Gallery(label="Extracted Images")], | |
| title="PowerPoint to JSON Converter", | |
| description="Upload a PowerPoint file to convert its structure to JSON and display extracted images." | |
| ) | |
| iface.launch() |