Instructions to use openpecha/aligner with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use openpecha/aligner with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("openpecha/aligner") model = AutoModelForSeq2SeqLM.from_pretrained("openpecha/aligner") - Notebooks
- Google Colab
- Kaggle
Update handler.py
Browse files- handler.py +24 -59
handler.py
CHANGED
|
@@ -18,7 +18,7 @@ from contextlib import contextmanager
|
|
| 18 |
import requests
|
| 19 |
logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO)
|
| 20 |
# Git clone command
|
| 21 |
-
git_clone_command = "git clone https://github.com/
|
| 22 |
|
| 23 |
# Run the command using subprocess
|
| 24 |
try:
|
|
@@ -26,13 +26,9 @@ try:
|
|
| 26 |
print("Git clone successful!")
|
| 27 |
except subprocess.CalledProcessError as e:
|
| 28 |
print(f"Error while running Git clone command: {e}")
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
-
ALIGNER_SCRIPT_DIR = Path("./tibetan-aligner").resolve()
|
| 32 |
-
ALIGNER_SCRIPT_NAME = "align_tib_en.sh"
|
| 33 |
-
ALIGNER_SCRIPT_PATH = ALIGNER_SCRIPT_DIR / ALIGNER_SCRIPT_NAME
|
| 34 |
-
assert ALIGNER_SCRIPT_PATH.is_file()
|
| 35 |
-
|
| 36 |
import requests
|
| 37 |
|
| 38 |
GITHUB_USERNAME = "pechawa"
|
|
@@ -46,18 +42,8 @@ GITHUB_API_ENDPOINT = f"https://api.github.com/orgs/{GITHUB_ORG}/repos"
|
|
| 46 |
DEBUG = True
|
| 47 |
|
| 48 |
quiet = "-q" if DEBUG else ""
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
st = os.stat(fn)
|
| 52 |
-
os.chmod(fn, st.st_mode | stat.S_IEXEC)
|
| 53 |
-
st = os.stat(fn)
|
| 54 |
-
os.chmod(fn, st.st_mode | stat.S_IXGRP)
|
| 55 |
-
st = os.stat(fn)
|
| 56 |
-
os.chmod(fn, st.st_mode | stat.S_IXOTH)
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
make_dir_executable(ALIGNER_SCRIPT_DIR)
|
| 60 |
-
|
| 61 |
|
| 62 |
def create_github_repo(repo_path: Path, repo_name: str, version: str, realign: bool):
|
| 63 |
logging.info("[INFO] Creating GitHub repo...")
|
|
@@ -114,6 +100,7 @@ def create_github_repo(repo_path: Path, repo_name: str, version: str, realign: b
|
|
| 114 |
subprocess.run(f"git push -u origin {branch_name}".split(), cwd=str(repo_path))
|
| 115 |
|
| 116 |
return f"Branch '{branch_name}' updated in {repo_name}" if realign else response.json()["html_url"]
|
|
|
|
| 117 |
def convert_raw_align_to_tm(align_fn: Path, tm_path: Path):
|
| 118 |
if DEBUG:
|
| 119 |
logging.debug("[INFO] Conerting raw alignment to TM repo...")
|
|
@@ -273,47 +260,27 @@ def download_file(s3_public_url: str, output_fn) -> Path:
|
|
| 273 |
def _run_align_script(bo_fn, en_fn, output_dir):
|
| 274 |
start = time.time()
|
| 275 |
|
| 276 |
-
#
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
except IndexError as e:
|
| 295 |
-
logging.error(f"Error processing file path: {e}")
|
| 296 |
-
return None
|
| 297 |
-
except AttributeError as e:
|
| 298 |
-
logging.error("Failed to find output file path in script output.")
|
| 299 |
-
return None
|
| 300 |
-
|
| 301 |
-
output_fn = Path(output_fn)
|
| 302 |
-
|
| 303 |
-
# Check if the file exists and read its content
|
| 304 |
-
if output_fn.exists() and output_fn.is_file():
|
| 305 |
-
content = output_fn.read_text()
|
| 306 |
-
if content:
|
| 307 |
-
logging.info(f"Length of content in bo.tx.clean{output_fn}: {len(content)}")
|
| 308 |
-
else:
|
| 309 |
-
logging.warning(f"The file {output_fn} is empty.")
|
| 310 |
-
else:
|
| 311 |
-
logging.error(f"The file {output_fn} does not exist.")
|
| 312 |
-
|
| 313 |
end = time.time()
|
| 314 |
total_time = round((end - start) / 60, 2)
|
| 315 |
logging.info(f"Total time taken for Aligning: {total_time} mins")
|
| 316 |
-
|
| 317 |
return output_fn
|
| 318 |
def align(text_pair):
|
| 319 |
|
|
@@ -350,7 +317,6 @@ def align(text_pair):
|
|
| 350 |
except Exception as e:
|
| 351 |
logging.error(f"Error in creating TM repository: {e}")
|
| 352 |
return {"error": f"Error in repository creation: {e}"}
|
| 353 |
-
|
| 354 |
class EndpointHandler():
|
| 355 |
def __init__(self, path=""):
|
| 356 |
self.path = path
|
|
@@ -364,5 +330,4 @@ class EndpointHandler():
|
|
| 364 |
A :obj:`list`:. The list contains the embeddings of the inference inputs
|
| 365 |
"""
|
| 366 |
data = data.pop("inputs",data)
|
| 367 |
-
return align(data)
|
| 368 |
-
|
|
|
|
| 18 |
import requests
|
| 19 |
logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO)
|
| 20 |
# Git clone command
|
| 21 |
+
git_clone_command = "git clone https://github.com/TenzinGayche/bertalign.git"
|
| 22 |
|
| 23 |
# Run the command using subprocess
|
| 24 |
try:
|
|
|
|
| 26 |
print("Git clone successful!")
|
| 27 |
except subprocess.CalledProcessError as e:
|
| 28 |
print(f"Error while running Git clone command: {e}")
|
| 29 |
+
from bertalign import Bertalign
|
| 30 |
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
import requests
|
| 33 |
|
| 34 |
GITHUB_USERNAME = "pechawa"
|
|
|
|
| 42 |
DEBUG = True
|
| 43 |
|
| 44 |
quiet = "-q" if DEBUG else ""
|
| 45 |
+
import subprocess
|
| 46 |
+
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def create_github_repo(repo_path: Path, repo_name: str, version: str, realign: bool):
|
| 49 |
logging.info("[INFO] Creating GitHub repo...")
|
|
|
|
| 100 |
subprocess.run(f"git push -u origin {branch_name}".split(), cwd=str(repo_path))
|
| 101 |
|
| 102 |
return f"Branch '{branch_name}' updated in {repo_name}" if realign else response.json()["html_url"]
|
| 103 |
+
|
| 104 |
def convert_raw_align_to_tm(align_fn: Path, tm_path: Path):
|
| 105 |
if DEBUG:
|
| 106 |
logging.debug("[INFO] Conerting raw alignment to TM repo...")
|
|
|
|
| 260 |
def _run_align_script(bo_fn, en_fn, output_dir):
|
| 261 |
start = time.time()
|
| 262 |
|
| 263 |
+
# Configure logging
|
| 264 |
+
logging.basicConfig(level=logging.INFO)
|
| 265 |
+
|
| 266 |
+
# Read the text from the files
|
| 267 |
+
bo_text = Path(bo_fn).read_text(encoding='utf-8')
|
| 268 |
+
en_text = Path(en_fn).read_text(encoding='utf-8')
|
| 269 |
+
|
| 270 |
+
# Initialize the aligner and align sentences
|
| 271 |
+
aligner = Bertalign(bo_text, en_text, "bo", "en")
|
| 272 |
+
aligner.align_sents()
|
| 273 |
+
result = aligner.return_tsv()
|
| 274 |
+
|
| 275 |
+
# Prepare the output filename and write the result
|
| 276 |
+
output_fn = Path(output_dir).joinpath('result.txt') # Ensures correct path handling
|
| 277 |
+
with open(output_fn, "w", encoding='utf-8') as f:
|
| 278 |
+
f.write(result)
|
| 279 |
+
|
| 280 |
+
# Calculate and log the time taken
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
end = time.time()
|
| 282 |
total_time = round((end - start) / 60, 2)
|
| 283 |
logging.info(f"Total time taken for Aligning: {total_time} mins")
|
|
|
|
| 284 |
return output_fn
|
| 285 |
def align(text_pair):
|
| 286 |
|
|
|
|
| 317 |
except Exception as e:
|
| 318 |
logging.error(f"Error in creating TM repository: {e}")
|
| 319 |
return {"error": f"Error in repository creation: {e}"}
|
|
|
|
| 320 |
class EndpointHandler():
|
| 321 |
def __init__(self, path=""):
|
| 322 |
self.path = path
|
|
|
|
| 330 |
A :obj:`list`:. The list contains the embeddings of the inference inputs
|
| 331 |
"""
|
| 332 |
data = data.pop("inputs",data)
|
| 333 |
+
return align(data)
|
|
|