Instructions to use openpecha/aligner with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use openpecha/aligner with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("openpecha/aligner") model = AutoModelForSeq2SeqLM.from_pretrained("openpecha/aligner") - Notebooks
- Google Colab
- Kaggle
Update handler.py
Browse files- handler.py +34 -8
handler.py
CHANGED
|
@@ -256,17 +256,43 @@ def _run_align_script(bo_fn, en_fn, output_dir):
|
|
| 256 |
logging.info(f"Total time taken for Aligning: {total_time} mins")
|
| 257 |
return output_fn
|
| 258 |
def align(text_pair):
|
|
|
|
| 259 |
logging.info(f"Running aligner for TM{text_pair['text_id']}...")
|
| 260 |
with TemporaryDirectory() as tmpdir:
|
| 261 |
output_dir = Path(tmpdir)
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
class EndpointHandler():
|
| 272 |
def __init__(self, path=""):
|
|
|
|
| 256 |
logging.info(f"Total time taken for Aligning: {total_time} mins")
|
| 257 |
return output_fn
|
| 258 |
def align(text_pair):
|
| 259 |
+
|
| 260 |
logging.info(f"Running aligner for TM{text_pair['text_id']}...")
|
| 261 |
with TemporaryDirectory() as tmpdir:
|
| 262 |
output_dir = Path(tmpdir)
|
| 263 |
+
|
| 264 |
+
# Download files and validate them
|
| 265 |
+
bo_fn = download_file(text_pair["bo_file_url"], output_fn=output_dir / "bo.txt")
|
| 266 |
+
en_fn = download_file(text_pair["en_file_url"], output_fn=output_dir / "en.txt")
|
| 267 |
+
|
| 268 |
+
# Check if files are downloaded correctly
|
| 269 |
+
if not bo_fn.exists() or bo_fn.stat().st_size == 0:
|
| 270 |
+
logging.error(f"Failed to download or empty file: {bo_fn}")
|
| 271 |
+
return {"error": "Failed to download Tibetan file or file is empty"}
|
| 272 |
+
|
| 273 |
+
if not en_fn.exists() or en_fn.stat().st_size == 0:
|
| 274 |
+
logging.error(f"Failed to download or empty file: {en_fn}")
|
| 275 |
+
return {"error": "Failed to download English file or file is empty"}
|
| 276 |
+
|
| 277 |
+
# Log content of files for verification
|
| 278 |
+
logging.info(f"Content of {bo_fn.name}: {bo_fn.read_text()[:100]}...")
|
| 279 |
+
logging.info(f"Content of {en_fn.name}: {en_fn.read_text()[:100]}...")
|
| 280 |
+
|
| 281 |
+
# Run alignment script
|
| 282 |
+
try:
|
| 283 |
+
aligned_fn = _run_align_script(bo_fn, en_fn, output_dir)
|
| 284 |
+
logging.info(f"Alignment script output file: {aligned_fn}")
|
| 285 |
+
except Exception as e:
|
| 286 |
+
logging.error(f"Alignment script error: {e}")
|
| 287 |
+
return {"error": f"Alignment script failed: {e}"}
|
| 288 |
+
|
| 289 |
+
# Create TM repository
|
| 290 |
+
try:
|
| 291 |
+
repo_url = create_tm(aligned_fn, text_pair=text_pair)
|
| 292 |
+
return {"tm_repo_url": repo_url}
|
| 293 |
+
except Exception as e:
|
| 294 |
+
logging.error(f"Error in creating TM repository: {e}")
|
| 295 |
+
return {"error": f"Error in repository creation: {e}"}
|
| 296 |
|
| 297 |
class EndpointHandler():
|
| 298 |
def __init__(self, path=""):
|