TenzinGayche commited on
Commit
690bd1a
·
verified ·
1 Parent(s): 872e749

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +24 -59
handler.py CHANGED
@@ -18,7 +18,7 @@ from contextlib import contextmanager
18
  import requests
19
  logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO)
20
  # Git clone command
21
- git_clone_command = "git clone https://github.com/OpenPecha/tibetan-aligner"
22
 
23
  # Run the command using subprocess
24
  try:
@@ -26,13 +26,9 @@ try:
26
  print("Git clone successful!")
27
  except subprocess.CalledProcessError as e:
28
  print(f"Error while running Git clone command: {e}")
 
29
 
30
 
31
- ALIGNER_SCRIPT_DIR = Path("./tibetan-aligner").resolve()
32
- ALIGNER_SCRIPT_NAME = "align_tib_en.sh"
33
- ALIGNER_SCRIPT_PATH = ALIGNER_SCRIPT_DIR / ALIGNER_SCRIPT_NAME
34
- assert ALIGNER_SCRIPT_PATH.is_file()
35
-
36
  import requests
37
 
38
  GITHUB_USERNAME = "pechawa"
@@ -46,18 +42,8 @@ GITHUB_API_ENDPOINT = f"https://api.github.com/orgs/{GITHUB_ORG}/repos"
46
  DEBUG = True
47
 
48
  quiet = "-q" if DEBUG else ""
49
- def make_dir_executable(dir_path: Path):
50
- for fn in dir_path.iterdir():
51
- st = os.stat(fn)
52
- os.chmod(fn, st.st_mode | stat.S_IEXEC)
53
- st = os.stat(fn)
54
- os.chmod(fn, st.st_mode | stat.S_IXGRP)
55
- st = os.stat(fn)
56
- os.chmod(fn, st.st_mode | stat.S_IXOTH)
57
-
58
-
59
- make_dir_executable(ALIGNER_SCRIPT_DIR)
60
-
61
 
62
  def create_github_repo(repo_path: Path, repo_name: str, version: str, realign: bool):
63
  logging.info("[INFO] Creating GitHub repo...")
@@ -114,6 +100,7 @@ def create_github_repo(repo_path: Path, repo_name: str, version: str, realign: b
114
  subprocess.run(f"git push -u origin {branch_name}".split(), cwd=str(repo_path))
115
 
116
  return f"Branch '{branch_name}' updated in {repo_name}" if realign else response.json()["html_url"]
 
117
  def convert_raw_align_to_tm(align_fn: Path, tm_path: Path):
118
  if DEBUG:
119
  logging.debug("[INFO] Conerting raw alignment to TM repo...")
@@ -273,47 +260,27 @@ def download_file(s3_public_url: str, output_fn) -> Path:
273
  def _run_align_script(bo_fn, en_fn, output_dir):
274
  start = time.time()
275
 
276
- # Execute the alignment script
277
- cmd = [str(ALIGNER_SCRIPT_PATH), str(bo_fn), str(en_fn), str(output_dir)]
278
- try:
279
- output = subprocess.run(
280
- cmd,
281
- check=True,
282
- capture_output=True,
283
- text=True,
284
- cwd=str(ALIGNER_SCRIPT_DIR),
285
- )
286
- except subprocess.CalledProcessError as e:
287
- logging.error(f"Alignment script failed with error: {e}")
288
- return None
289
-
290
- # Extract the output file path
291
- try:
292
- output_fn = re.search(r"\[OUTPUT\] (.*)", output.stdout).group(1)
293
- output_fn = "/" + output_fn.split("//")[-1] # Correcting the duplicated path
294
- except IndexError as e:
295
- logging.error(f"Error processing file path: {e}")
296
- return None
297
- except AttributeError as e:
298
- logging.error("Failed to find output file path in script output.")
299
- return None
300
-
301
- output_fn = Path(output_fn)
302
-
303
- # Check if the file exists and read its content
304
- if output_fn.exists() and output_fn.is_file():
305
- content = output_fn.read_text()
306
- if content:
307
- logging.info(f"Length of content in bo.tx.clean{output_fn}: {len(content)}")
308
- else:
309
- logging.warning(f"The file {output_fn} is empty.")
310
- else:
311
- logging.error(f"The file {output_fn} does not exist.")
312
-
313
  end = time.time()
314
  total_time = round((end - start) / 60, 2)
315
  logging.info(f"Total time taken for Aligning: {total_time} mins")
316
-
317
  return output_fn
318
  def align(text_pair):
319
 
@@ -350,7 +317,6 @@ def align(text_pair):
350
  except Exception as e:
351
  logging.error(f"Error in creating TM repository: {e}")
352
  return {"error": f"Error in repository creation: {e}"}
353
-
354
  class EndpointHandler():
355
  def __init__(self, path=""):
356
  self.path = path
@@ -364,5 +330,4 @@ class EndpointHandler():
364
  A :obj:`list`:. The list contains the embeddings of the inference inputs
365
  """
366
  data = data.pop("inputs",data)
367
- return align(data)
368
-
 
18
  import requests
19
  logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO)
20
  # Git clone command
21
+ git_clone_command = "git clone https://github.com/TenzinGayche/bertalign.git"
22
 
23
  # Run the command using subprocess
24
  try:
 
26
  print("Git clone successful!")
27
  except subprocess.CalledProcessError as e:
28
  print(f"Error while running Git clone command: {e}")
29
+ from bertalign import Bertalign
30
 
31
 
 
 
 
 
 
32
  import requests
33
 
34
  GITHUB_USERNAME = "pechawa"
 
42
  DEBUG = True
43
 
44
  quiet = "-q" if DEBUG else ""
45
+ import subprocess
46
+ import logging
 
 
 
 
 
 
 
 
 
 
47
 
48
  def create_github_repo(repo_path: Path, repo_name: str, version: str, realign: bool):
49
  logging.info("[INFO] Creating GitHub repo...")
 
100
  subprocess.run(f"git push -u origin {branch_name}".split(), cwd=str(repo_path))
101
 
102
  return f"Branch '{branch_name}' updated in {repo_name}" if realign else response.json()["html_url"]
103
+
104
  def convert_raw_align_to_tm(align_fn: Path, tm_path: Path):
105
  if DEBUG:
106
  logging.debug("[INFO] Conerting raw alignment to TM repo...")
 
260
  def _run_align_script(bo_fn, en_fn, output_dir):
261
  start = time.time()
262
 
263
+ # Configure logging
264
+ logging.basicConfig(level=logging.INFO)
265
+
266
+ # Read the text from the files
267
+ bo_text = Path(bo_fn).read_text(encoding='utf-8')
268
+ en_text = Path(en_fn).read_text(encoding='utf-8')
269
+
270
+ # Initialize the aligner and align sentences
271
+ aligner = Bertalign(bo_text, en_text, "bo", "en")
272
+ aligner.align_sents()
273
+ result = aligner.return_tsv()
274
+
275
+ # Prepare the output filename and write the result
276
+ output_fn = Path(output_dir).joinpath('result.txt') # Ensures correct path handling
277
+ with open(output_fn, "w", encoding='utf-8') as f:
278
+ f.write(result)
279
+
280
+ # Calculate and log the time taken
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  end = time.time()
282
  total_time = round((end - start) / 60, 2)
283
  logging.info(f"Total time taken for Aligning: {total_time} mins")
 
284
  return output_fn
285
  def align(text_pair):
286
 
 
317
  except Exception as e:
318
  logging.error(f"Error in creating TM repository: {e}")
319
  return {"error": f"Error in repository creation: {e}"}
 
320
  class EndpointHandler():
321
  def __init__(self, path=""):
322
  self.path = path
 
330
  A :obj:`list`:. The list contains the embeddings of the inference inputs
331
  """
332
  data = data.pop("inputs",data)
333
+ return align(data)