|
|
import os |
|
|
from subprocess import Popen, PIPE |
|
|
import re |
|
|
|
|
|
def moses_to_file(translated_moses_file: str, source_lang: str, target_lang: str, tikal_folder: str, |
|
|
original_xliff_file_path: str): |
|
|
|
|
|
tikal_moses_to_xliff_command = [os.path.join(tikal_folder, "tikal.sh"), "-lm", original_xliff_file_path, "-sl", |
|
|
source_lang, "-tl", target_lang, "-from", translated_moses_file, "-totrg", |
|
|
"-noalttrans", "-to", original_xliff_file_path] |
|
|
Popen(tikal_moses_to_xliff_command).wait() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text = open(original_xliff_file_path).read() |
|
|
result = re.sub(r'<g id="\d+">(.*?)</g>', r'\1', text) |
|
|
open(original_xliff_file_path, "w").write(result) |
|
|
|
|
|
|
|
|
tikal_merge_doc_command = [os.path.join(tikal_folder, "tikal.sh"), "-m", original_xliff_file_path] |
|
|
final_process = Popen(tikal_merge_doc_command, stdout=PIPE, stderr=PIPE) |
|
|
stdout, stderr = final_process.communicate() |
|
|
final_process.wait() |
|
|
|
|
|
|
|
|
output = stdout.decode('utf-8') |
|
|
return re.search(r'(?<=Output:\s)(.*)', output)[0] |
|
|
|
|
|
def file_to_moses(input_file: str, source_lang: str, target_lang: str, tikal_folder: str, |
|
|
original_xliff_file_path: str) -> str: |
|
|
""" |
|
|
Given a document, this function generates an xliff file and then a plain text file with the text contents |
|
|
while keeping style and formatting using tags like <g id=1> </g> |
|
|
|
|
|
Parameters: |
|
|
input_file: Path to document to process |
|
|
source_lang: Source language of the document |
|
|
target_lang: Target language of the document |
|
|
tikal_folder: Folder where tikal.sh is located |
|
|
original_xliff_file_path: Path to xliff file to generate, which will be use later |
|
|
|
|
|
Returns: |
|
|
string: Path to plain text file |
|
|
""" |
|
|
|
|
|
tikal_xliff_command = [os.path.join(tikal_folder, "tikal.sh"), "-x", input_file, "-nocopy", "-sl", source_lang, |
|
|
"-tl", target_lang] |
|
|
Popen(tikal_xliff_command).wait() |
|
|
|
|
|
tikal_moses_command = [os.path.join(tikal_folder, "tikal.sh"), "-xm", original_xliff_file_path, "-sl", source_lang, |
|
|
"-tl", target_lang] |
|
|
Popen(tikal_moses_command).wait() |
|
|
|
|
|
return os.path.join(original_xliff_file_path + f".{source_lang}") |