| import os |
| import tempfile |
| import uuid |
| import zipfile |
| import shutil |
| from lxml import etree |
| import gradio as gr |
| from concurrent.futures import ThreadPoolExecutor |
| from functools import lru_cache |
|
|
| from huggingface_hub import InferenceClient |
|
|
| |
| |
| |
|
|
| hf_token = os.getenv("HF_TOKEN") |
| if not hf_token: |
| raise ValueError("Hugging Face token not found. Please set it in the environment.") |
| client = InferenceClient(token=hf_token) |
| llama_model_id = "google/gemma-2-27b-it" |
| client.model = llama_model_id |
|
|
| |
| |
| |
|
|
| languages = { |
| "Arabic": "ar", |
| "Bulgarian": "bg", |
| "Catalan": "ca", |
| "Czech": "cs", |
| "German": "de", |
| "English": "en", |
| "Spanish": "es", |
| "Estonian": "et", |
| "Finnish": "fi", |
| "French": "fr", |
| "Hungarian": "hu", |
| "Italian": "it", |
| "Lithuanian": "lt", |
| "Latvian": "lv", |
| "Dutch": "nl", |
| "Polish": "pl", |
| "Portuguese": "pt", |
| "Romanian": "ro", |
| "Russian": "ru", |
| "Slovak": "sk", |
| "Slovenian": "sl", |
| "Swedish": "sv", |
| "Turkish": "tr", |
| "Ukrainian": "uk", |
| "Vietnamese": "vi", |
| |
| } |
|
|
| sorted_languages = sorted(languages.items()) |
|
|
| |
| |
| |
|
|
| def single_translate(target_lang, text): |
| |
| language_names = {v: k for k, v in languages.items()} |
| language = language_names.get(target_lang, "Arabic") |
|
|
| |
| prompt = f"You are a professional Translator: don't say a greeting or anything just jump into the translation: Make sure you don't output anything else other than the {language} translation requested for the upcoming text:{text} also dont use curly brackets or such just the translation." |
| try: |
| output = client.text_generation(prompt) |
| |
| if isinstance(output, str): |
| return output.strip() |
| elif isinstance(output, list) and len(output) > 0: |
| first_item = output[0] |
| if isinstance(first_item, dict) and "generated_text" in first_item: |
| return first_item["generated_text"].strip() |
| else: |
| return str(output).strip() |
| else: |
| return str(output).strip() |
| except Exception as e: |
| return f"Translation error: {e}" |
|
|
| |
| |
| |
|
|
| @lru_cache(maxsize=10000) |
| def translate_texts(target_lang, texts): |
| translations = [] |
| max_workers = 8 |
| with ThreadPoolExecutor(max_workers=max_workers) as executor: |
| futures = [executor.submit(single_translate, target_lang, text) for text in texts] |
| for future in futures: |
| translations.append(future.result()) |
| return translations |
|
|
| |
| |
| |
|
|
| def get_slide_size_from_presentation(xml_root): |
| fallback_width = 914400 * 10 |
| fallback_height = 914400 * 7 |
| nsmap = {"p": "http://schemas.openxmlformats.org/presentationml/2006/main"} |
| sldSz_elems = xml_root.xpath("//p:presentation/p:sldSz", namespaces=nsmap) |
| if sldSz_elems: |
| sldSz = sldSz_elems[0] |
| cx_attr = sldSz.get("cx") |
| cy_attr = sldSz.get("cy") |
| if cx_attr and cy_attr: |
| try: |
| slide_width_emu = int(cx_attr) |
| slide_height_emu = int(cy_attr) |
| return (slide_width_emu, slide_height_emu) |
| except: |
| pass |
| return (fallback_width, fallback_height) |
|
|
| def mirror_x_around_center(off_x, ext_cx, slide_width): |
| shape_center = off_x + (ext_cx / 2.0) |
| slide_center = slide_width / 2.0 |
| displacement = shape_center - slide_center |
| mirrored_center = slide_center - displacement |
| new_off_x = mirrored_center - (ext_cx / 2.0) |
| return int(new_off_x) |
|
|
| def reflect_translate_align_xml(xml_path, slide_width_emu, target_lang, do_mirroring=True, do_text_translation=True, do_right_align=True): |
| nsmap = { |
| "p": "http://schemas.openxmlformats.org/presentationml/2006/main", |
| "a": "http://schemas.openxmlformats.org/drawingml/2006/main" |
| } |
| tree = etree.parse(xml_path) |
| root_elem = tree.getroot() |
| changed = False |
|
|
| if do_mirroring: |
| xfrm_elems = root_elem.xpath( |
| ".//p:spPr/a:xfrm | .//p:pic/p:spPr/a:xfrm | .//p:cxnSp/p:spPr/a:xfrm", |
| namespaces=nsmap |
| ) |
| for xfrm in xfrm_elems: |
| off_elem = xfrm.find("a:off", nsmap) |
| ext_elem = xfrm.find("a:ext", nsmap) |
| if off_elem is not None and ext_elem is not None: |
| try: |
| off_x = int(off_elem.get("x", "0")) |
| ext_cx = int(ext_elem.get("cx", "0")) |
| new_off_x = mirror_x_around_center(off_x, ext_cx, slide_width_emu) |
| off_elem.set("x", str(new_off_x)) |
| changed = True |
| except: |
| pass |
|
|
| if do_text_translation: |
| t_elems = root_elem.xpath(".//a:t", namespaces=nsmap) |
| texts = [a_t.text.strip() for a_t in t_elems if a_t.text and a_t.text.strip()] |
| unique_texts = list(set(texts)) |
| translations = translate_texts(target_lang, tuple(unique_texts)) if texts else [] |
|
|
| translation_map = dict(zip(unique_texts, translations)) |
| for a_t in t_elems: |
| original_text = a_t.text.strip() if a_t.text else "" |
| if original_text in translation_map: |
| a_t.text = translation_map.get(original_text, original_text) |
| changed = True |
|
|
| if do_right_align: |
| pPr_elems = root_elem.xpath(".//a:p/a:pPr", namespaces=nsmap) |
| for pPr in pPr_elems: |
| pPr.set("algn", "r") |
| changed = True |
|
|
| if changed: |
| tree.write(xml_path, xml_declaration=True, encoding="UTF-8", standalone="yes") |
| return changed |
|
|
| def direct_raw_translate_reflect_pptx_with_progress(pptx_path, target_lang, do_mirroring): |
| try: |
| temp_dir = os.path.join(tempfile.gettempdir(), f"raw_{uuid.uuid4().hex}") |
| os.makedirs(temp_dir, exist_ok=True) |
| yield (None, "10% - Extracting PPTX contents...") |
|
|
| with zipfile.ZipFile(pptx_path, "r") as z_in: |
| z_in.extractall(temp_dir) |
|
|
| presentation_xml = os.path.join(temp_dir, "ppt", "presentation.xml") |
| slide_width_emu, slide_height_emu = (9144000, 6858000) |
| if os.path.exists(presentation_xml): |
| pres_tree = etree.parse(presentation_xml) |
| pres_root = pres_tree.getroot() |
| slide_width_emu, slide_height_emu = get_slide_size_from_presentation(pres_root) |
| yield (None, "20% - Retrieved slide size.") |
|
|
| xml_files = [] |
| for sub_dir in ["slideMasters", "slideLayouts", "slides"]: |
| dir_path = os.path.join(temp_dir, "ppt", sub_dir) |
| if os.path.isdir(dir_path): |
| for fname in os.listdir(dir_path): |
| if fname.endswith(".xml"): |
| xml_files.append(os.path.join(dir_path, fname)) |
| total_files = len(xml_files) |
| yield (None, f"30% - Found {total_files} XML files to process.") |
|
|
| with ThreadPoolExecutor() as executor: |
| for idx, _ in enumerate(executor.map(lambda path: reflect_translate_align_xml( |
| path, slide_width_emu, target_lang, do_mirroring, do_right_align=do_mirroring), xml_files), start=1): |
| progress_percent = 30 + (50 * idx / total_files) |
| yield (None, f"{int(progress_percent)}% - Processed {idx}/{total_files} XML files.") |
|
|
| yield (None, "80% - Fixing color schemes...") |
|
|
| ppt_dir = os.path.join(temp_dir, "ppt") |
| for root_dir, dirs, files in os.walk(ppt_dir): |
| for file in files: |
| if file.endswith(".xml"): |
| xml_path = os.path.join(root_dir, file) |
| tree = etree.parse(xml_path) |
| root_elem = tree.getroot() |
| nsmap = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"} |
| scheme_clr_elems = root_elem.xpath(".//a:schemeClr", namespaces=nsmap) |
| changed2 = False |
| for sc in scheme_clr_elems: |
| val_attr = sc.get("val") |
| if not val_attr or val_attr.lower() == "none": |
| srgb_elem = etree.Element("{http://schemas.openxmlformats.org/drawingml/2006/main}srgbClr") |
| srgb_elem.set("val", "000000") |
| parent = sc.getparent() |
| parent.replace(sc, srgb_elem) |
| changed2 = True |
| if changed2: |
| tree.write(xml_path, xml_declaration=True, encoding="UTF-8", standalone="yes") |
|
|
| yield (None, "90% - Re-zipping the PPTX file...") |
|
|
| translated_path = os.path.join(tempfile.gettempdir(), f"translated_{uuid.uuid4().hex}.pptx") |
| with zipfile.ZipFile(translated_path, "w", zipfile.ZIP_DEFLATED) as z_out: |
| for root_dir, dirs, files in os.walk(temp_dir): |
| for file in files: |
| full_path = os.path.join(root_dir, file) |
| rel_path = os.path.relpath(full_path, temp_dir) |
| z_out.write(full_path, arcname=rel_path) |
|
|
| shutil.rmtree(temp_dir, ignore_errors=True) |
| yield (translated_path, "β
Processing complete.") |
| except Exception as e: |
| yield (None, f"β Error: {str(e)}") |
|
|
| def raw_xml_reflect_and_translate(file, target_lang, do_mirroring): |
| if not file: |
| yield (None, "β No file uploaded.") |
| return |
| try: |
| yield from direct_raw_translate_reflect_pptx_with_progress(file, target_lang, do_mirroring) |
| except Exception as e: |
| yield (None, f"β Error: {str(e)}") |
|
|
| |
| |
| |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("<h1>tarGEM π</h1>") |
| gr.Markdown(""" |
| <h3>PowerPoint Reflection & Translation Tool</h3> |
| <p>Enhance your presentations with mirrored layouts, translated text, and aligned bullet points.</p> |
| <ul> |
| <li>π <strong>Mirror</strong> shapes for balanced designs</li> |
| <li>π <strong>Translate</strong> text seamlessly</li> |
| <li>π <strong>Align</strong> to the right using RTL checkbox</li> |
| <li>π¨ <strong>Keep your original</strong> font style, slide background and color schemes</li> |
| </ul> |
| """) |
|
|
| ppt_input = gr.File(label="π Upload PPTX File", file_types=[".pptx"], type="filepath") |
|
|
| language_dropdown = gr.Dropdown( |
| label="π Select Target Language", |
| choices=[(name, code) for name, code in sorted_languages], |
| value="ar", |
| type="value", |
| interactive=True |
| ) |
|
|
| mirror_checkbox = gr.Checkbox( |
| label="π Mirror Shapes for RTL", |
| value=True, |
| interactive=True |
| ) |
|
|
| reflect_button = gr.Button("Process PPTX") |
|
|
| output_file = gr.File(label="π Download Processed PPTX", interactive=False) |
|
|
| status_box = gr.Textbox(label="Status", interactive=False) |
|
|
| reflect_button.click( |
| fn=raw_xml_reflect_and_translate, |
| inputs=[ppt_input, language_dropdown, mirror_checkbox], |
| outputs=[output_file, status_box], |
| queue=True |
| ) |
|
|
| gr.Markdown(""" |
| <p style="text-align: center; margin-top: 20px;"> |
| π Thank you for using tarGEM! |
| </p> |
| """) |
|
|
| demo.launch(share=True) |
|
|