Spaces:
Sleeping
Sleeping
Commit ·
affc051
1
Parent(s): 5fc502d
EasyOCR + NLLB optimizado
Browse files- .gitignore +0 -3
- Dockerfile +0 -70
- app.py +45 -88
- requirements.txt +1 -2
.gitignore
CHANGED
|
@@ -1,9 +1,6 @@
|
|
| 1 |
__pycache__/
|
| 2 |
*.pyc
|
| 3 |
|
| 4 |
-
# CTranslate2 bihurtutako NLLB modeloa (Docker build-ean sortzen da)
|
| 5 |
-
nllb-200-distilled-600M-ct2-int8/
|
| 6 |
-
|
| 7 |
# HuggingFace cachea (modeloak deskargatzean)
|
| 8 |
.cache/
|
| 9 |
huggingface/
|
|
|
|
| 1 |
__pycache__/
|
| 2 |
*.pyc
|
| 3 |
|
|
|
|
|
|
|
|
|
|
| 4 |
# HuggingFace cachea (modeloak deskargatzean)
|
| 5 |
.cache/
|
| 6 |
huggingface/
|
Dockerfile
CHANGED
|
@@ -14,53 +14,6 @@ WORKDIR /app
|
|
| 14 |
COPY requirements.txt .
|
| 15 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 16 |
|
| 17 |
-
# CTranslate2-ren .so-ari "executable stack" bandera kendu (kernel berriek errefusatzen dute).
|
| 18 |
-
# Debian trixie-k ez du execstack paketea, beraz Python script batekin egiten dugu:
|
| 19 |
-
# PT_GNU_STACK program-headerraren p_flags-eko PF_X bita (0x1) zerora ezarri.
|
| 20 |
-
COPY <<'EOF' /tmp/fix_execstack.py
|
| 21 |
-
import os, struct, sys
|
| 22 |
-
|
| 23 |
-
base = '/usr/local/lib/python3.11/site-packages/ctranslate2'
|
| 24 |
-
PT_GNU_STACK_LE = b'\x51\xe5\x74\x64' # 0x6474e551 little-endian
|
| 25 |
-
total = 0
|
| 26 |
-
for root, _, files in os.walk(base):
|
| 27 |
-
for fname in files:
|
| 28 |
-
if not (fname.endswith('.so') or '.so.' in fname):
|
| 29 |
-
continue
|
| 30 |
-
path = os.path.join(root, fname)
|
| 31 |
-
with open(path, 'rb') as fp:
|
| 32 |
-
data = bytearray(fp.read())
|
| 33 |
-
changed = False
|
| 34 |
-
i = 0
|
| 35 |
-
while True:
|
| 36 |
-
i = data.find(PT_GNU_STACK_LE, i)
|
| 37 |
-
if i < 0:
|
| 38 |
-
break
|
| 39 |
-
# ELF64 program header: p_type(4) p_flags(4) ...
|
| 40 |
-
flags_off = i + 4
|
| 41 |
-
(flags,) = struct.unpack_from('<I', data, flags_off)
|
| 42 |
-
if flags & 0x1:
|
| 43 |
-
struct.pack_into('<I', data, flags_off, flags & ~0x1)
|
| 44 |
-
changed = True
|
| 45 |
-
total += 1
|
| 46 |
-
print(f'[fix_execstack] {path} offset {i} flags {flags:#x} -> {flags & ~0x1:#x}')
|
| 47 |
-
i += 4
|
| 48 |
-
if changed:
|
| 49 |
-
with open(path, 'wb') as fp:
|
| 50 |
-
fp.write(bytes(data))
|
| 51 |
-
print(f'[fix_execstack] Aldaketak: {total}')
|
| 52 |
-
EOF
|
| 53 |
-
RUN python /tmp/fix_execstack.py && rm /tmp/fix_execstack.py
|
| 54 |
-
|
| 55 |
-
# NLLB-200 CTranslate2 formatura bihurtu (INT8 kuantizazioa CPUrako)
|
| 56 |
-
# Build-denboran egiten da: irudia handiagoa baina abiaraztea askoz azkarragoa
|
| 57 |
-
RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('facebook/nllb-200-distilled-600M')" && \
|
| 58 |
-
ct2-transformers-converter \
|
| 59 |
-
--model facebook/nllb-200-distilled-600M \
|
| 60 |
-
--output_dir /app/nllb-200-distilled-600M-ct2-int8 \
|
| 61 |
-
--quantization int8 \
|
| 62 |
-
--force
|
| 63 |
-
|
| 64 |
# Aplikazioaren kodea kopiatu
|
| 65 |
COPY app.py .
|
| 66 |
|
|
@@ -69,26 +22,3 @@ EXPOSE 7860
|
|
| 69 |
|
| 70 |
# Zerbitzaria abiarazi
|
| 71 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
#FROM python:3.11-slim
|
| 77 |
-
|
| 78 |
-
#RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 79 |
-
# libglib2.0-0 \
|
| 80 |
-
# libgl1 \
|
| 81 |
-
# libgomp1 \
|
| 82 |
-
# libgthread-2.0-0 \
|
| 83 |
-
# && rm -rf /var/lib/apt/lists/*
|
| 84 |
-
|
| 85 |
-
#WORKDIR /app
|
| 86 |
-
|
| 87 |
-
#COPY requirements.txt .
|
| 88 |
-
#RUN pip install --no-cache-dir -r requirements.txt
|
| 89 |
-
|
| 90 |
-
#COPY app.py .
|
| 91 |
-
|
| 92 |
-
#EXPOSE 7860
|
| 93 |
-
|
| 94 |
-
#CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 14 |
COPY requirements.txt .
|
| 15 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Aplikazioaren kodea kopiatu
|
| 18 |
COPY app.py .
|
| 19 |
|
|
|
|
| 22 |
|
| 23 |
# Zerbitzaria abiarazi
|
| 24 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -3,7 +3,6 @@ app.py — FastAPI + EasyOCR + Gemini + NLLB + HiTZ zerbitzaria
|
|
| 3 |
OCR + postzuzenketa (Gemini 2.5 Flash) + itzulpena (NLLB-200 + HiTZ Marian).
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
import asyncio
|
| 7 |
import io
|
| 8 |
import logging
|
| 9 |
import os
|
|
@@ -11,18 +10,22 @@ import re
|
|
| 11 |
import time
|
| 12 |
from contextlib import asynccontextmanager
|
| 13 |
|
| 14 |
-
import ctranslate2
|
| 15 |
import easyocr
|
| 16 |
import httpx
|
| 17 |
import numpy as np
|
| 18 |
-
import torch
|
| 19 |
from deskew import determine_skew
|
| 20 |
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
| 21 |
from fastapi.middleware.cors import CORSMiddleware
|
| 22 |
from fastapi.responses import JSONResponse
|
| 23 |
from PIL import Image
|
| 24 |
from skimage.transform import rotate
|
| 25 |
-
from transformers import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
logging.basicConfig(level=logging.INFO)
|
| 28 |
logger = logging.getLogger(__name__)
|
|
@@ -74,13 +77,6 @@ Corrected text:"""
|
|
| 74 |
readers: dict = {}
|
| 75 |
|
| 76 |
NLLB_MODEL_NAME = "facebook/nllb-200-distilled-600M"
|
| 77 |
-
# Modeloaren karpeta app.py-ren ondoan dago (lokalean zein Docker barruan)
|
| 78 |
-
NLLB_CT2_DIR = os.path.join(
|
| 79 |
-
os.path.dirname(os.path.abspath(__file__)),
|
| 80 |
-
"nllb-200-distilled-600M-ct2-int8",
|
| 81 |
-
)
|
| 82 |
-
CT2_INTRA_THREADS = int(os.environ.get("CT2_INTRA_THREADS", "2"))
|
| 83 |
-
CT2_INTER_THREADS = int(os.environ.get("CT2_INTER_THREADS", "1"))
|
| 84 |
|
| 85 |
HITZ_PAIRS = {
|
| 86 |
("en", "eu"): "HiTZ/mt-hitz-en-eu",
|
|
@@ -144,7 +140,7 @@ ISO_TO_NLLB = {
|
|
| 144 |
"zu": "zul_Latn",
|
| 145 |
}
|
| 146 |
|
| 147 |
-
|
| 148 |
nllb_tokenizer = None
|
| 149 |
hitz_models: dict = {}
|
| 150 |
|
|
@@ -309,23 +305,8 @@ def _adaptive_max_tokens(sentence: str) -> int:
|
|
| 309 |
return min(512, max(32, int(approx_src_tokens * 1.8)))
|
| 310 |
|
| 311 |
|
| 312 |
-
_TRANSLATION_CACHE: "dict[tuple[str, str, str], str]" = {}
|
| 313 |
-
_TRANSLATION_CACHE_MAX = 1024
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
def _cache_get(sentence: str, src: str, tgt: str) -> str | None:
|
| 317 |
-
return _TRANSLATION_CACHE.get((sentence, src, tgt))
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
def _cache_put(sentence: str, src: str, tgt: str, value: str) -> None:
|
| 321 |
-
if len(_TRANSLATION_CACHE) >= _TRANSLATION_CACHE_MAX:
|
| 322 |
-
# FIFO sinplea: lehen sartutakoa kendu
|
| 323 |
-
_TRANSLATION_CACHE.pop(next(iter(_TRANSLATION_CACHE)))
|
| 324 |
-
_TRANSLATION_CACHE[(sentence, src, tgt)] = value
|
| 325 |
-
|
| 326 |
-
|
| 327 |
def _nllb_translate(text: str, src_nllb: str, tgt_nllb: str) -> str:
|
| 328 |
-
"""NLLB-200 ereduarekin itzuli
|
| 329 |
if not text.strip():
|
| 330 |
return text
|
| 331 |
blocks = _flatten_to_sentences(text)
|
|
@@ -333,51 +314,37 @@ def _nllb_translate(text: str, src_nllb: str, tgt_nllb: str) -> str:
|
|
| 333 |
if not to_translate:
|
| 334 |
return text
|
| 335 |
|
| 336 |
-
logger.info("[NLLB
|
| 337 |
t0 = time.time()
|
| 338 |
-
|
| 339 |
nllb_tokenizer.src_lang = src_nllb
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
# 1) Cache-tik bete daitezkeenak bete
|
| 343 |
-
pending: list[tuple[int, str]] = []
|
| 344 |
-
cache_hits = 0
|
| 345 |
-
for i, sentence in enumerate(to_translate):
|
| 346 |
-
cached = _cache_get(sentence, src_nllb, tgt_nllb)
|
| 347 |
-
if cached is not None:
|
| 348 |
-
translations[i] = cached
|
| 349 |
-
cache_hits += 1
|
| 350 |
-
else:
|
| 351 |
-
pending.append((i, sentence))
|
| 352 |
|
| 353 |
-
|
| 354 |
BATCH = 8
|
| 355 |
-
for
|
| 356 |
-
chunk =
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
]
|
| 362 |
-
max_len = max(_adaptive_max_tokens(s) for s in chunk_sentences)
|
| 363 |
-
results = nllb_translator.translate_batch(
|
| 364 |
-
source_tokens_batch,
|
| 365 |
-
target_prefix=[[tgt_nllb]] * len(chunk_sentences),
|
| 366 |
-
beam_size=1,
|
| 367 |
-
max_decoding_length=max_len,
|
| 368 |
-
no_repeat_ngram_size=3,
|
| 369 |
)
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
|
| 382 |
|
| 383 |
def _hitz_translate(text: str, src: str, tgt: str) -> str:
|
|
@@ -436,19 +403,14 @@ async def lifespan(app: FastAPI):
|
|
| 436 |
logger.info("Reader kargatzen (quantize=True): %s %s", name, langs)
|
| 437 |
readers[name] = easyocr.Reader(langs, gpu=False, quantize=True)
|
| 438 |
|
| 439 |
-
global
|
| 440 |
-
logger.info("[LOAD] NLLB
|
| 441 |
nllb_tokenizer = AutoTokenizer.from_pretrained(NLLB_MODEL_NAME)
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
intra_threads=CT2_INTRA_THREADS,
|
| 448 |
-
inter_threads=CT2_INTER_THREADS,
|
| 449 |
-
)
|
| 450 |
-
logger.info("[LOAD] NLLB-CT2 prest | intra=%d inter=%d",
|
| 451 |
-
CT2_INTRA_THREADS, CT2_INTER_THREADS)
|
| 452 |
|
| 453 |
# HiTZ aldi baterako desaktibatuta (transformers bateragarritasun arazoak)
|
| 454 |
logger.info("[LOAD] HiTZ karga saltatzen (NLLB soilik modua)")
|
|
@@ -462,7 +424,6 @@ async def lifespan(app: FastAPI):
|
|
| 462 |
yield
|
| 463 |
readers.clear()
|
| 464 |
hitz_models.clear()
|
| 465 |
-
_TRANSLATION_CACHE.clear()
|
| 466 |
|
| 467 |
|
| 468 |
app = FastAPI(title="OCR + Itzulpena API", version="16.0.0", lifespan=lifespan)
|
|
@@ -480,9 +441,8 @@ async def health_check():
|
|
| 480 |
"status": "ok",
|
| 481 |
"scripts": list(readers.keys()),
|
| 482 |
"gemini": bool(GEMINI_API_KEY),
|
| 483 |
-
"nllb":
|
| 484 |
-
"
|
| 485 |
-
"translation_cache_size": len(_TRANSLATION_CACHE),
|
| 486 |
"hitz_pairs": [f"{s}-{t}" for (s, t) in hitz_models.keys()],
|
| 487 |
}
|
| 488 |
|
|
@@ -508,10 +468,7 @@ async def predict(
|
|
| 508 |
img_array = np.array(pil_image)
|
| 509 |
img_array = _deskew(img_array)
|
| 510 |
reader = readers[script]
|
| 511 |
-
|
| 512 |
-
results = await loop.run_in_executor(
|
| 513 |
-
None, lambda: reader.readtext(img_array, detail=1, paragraph=False)
|
| 514 |
-
)
|
| 515 |
raw_text = _group_into_lines(results)
|
| 516 |
logger.info("[OCR] Egina %.1fs-tan, %d karaktere", time.time() - t0, len(raw_text))
|
| 517 |
|
|
|
|
| 3 |
OCR + postzuzenketa (Gemini 2.5 Flash) + itzulpena (NLLB-200 + HiTZ Marian).
|
| 4 |
"""
|
| 5 |
|
|
|
|
| 6 |
import io
|
| 7 |
import logging
|
| 8 |
import os
|
|
|
|
| 10 |
import time
|
| 11 |
from contextlib import asynccontextmanager
|
| 12 |
|
|
|
|
| 13 |
import easyocr
|
| 14 |
import httpx
|
| 15 |
import numpy as np
|
| 16 |
+
import torch
|
| 17 |
from deskew import determine_skew
|
| 18 |
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
| 19 |
from fastapi.middleware.cors import CORSMiddleware
|
| 20 |
from fastapi.responses import JSONResponse
|
| 21 |
from PIL import Image
|
| 22 |
from skimage.transform import rotate
|
| 23 |
+
from transformers import (
|
| 24 |
+
AutoModelForSeq2SeqLM,
|
| 25 |
+
AutoTokenizer,
|
| 26 |
+
MarianMTModel,
|
| 27 |
+
MarianTokenizer,
|
| 28 |
+
)
|
| 29 |
|
| 30 |
logging.basicConfig(level=logging.INFO)
|
| 31 |
logger = logging.getLogger(__name__)
|
|
|
|
| 77 |
readers: dict = {}
|
| 78 |
|
| 79 |
NLLB_MODEL_NAME = "facebook/nllb-200-distilled-600M"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
HITZ_PAIRS = {
|
| 82 |
("en", "eu"): "HiTZ/mt-hitz-en-eu",
|
|
|
|
| 140 |
"zu": "zul_Latn",
|
| 141 |
}
|
| 142 |
|
| 143 |
+
nllb_model = None
|
| 144 |
nllb_tokenizer = None
|
| 145 |
hitz_models: dict = {}
|
| 146 |
|
|
|
|
| 305 |
return min(512, max(32, int(approx_src_tokens * 1.8)))
|
| 306 |
|
| 307 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
def _nllb_translate(text: str, src_nllb: str, tgt_nllb: str) -> str:
|
| 309 |
+
"""NLLB-200 ereduarekin itzuli, esaldika eta batch-ean."""
|
| 310 |
if not text.strip():
|
| 311 |
return text
|
| 312 |
blocks = _flatten_to_sentences(text)
|
|
|
|
| 314 |
if not to_translate:
|
| 315 |
return text
|
| 316 |
|
| 317 |
+
logger.info("[NLLB] %s -> %s | %d esaldi", src_nllb, tgt_nllb, len(to_translate))
|
| 318 |
t0 = time.time()
|
|
|
|
| 319 |
nllb_tokenizer.src_lang = src_nllb
|
| 320 |
+
forced_bos = nllb_tokenizer.convert_tokens_to_ids(tgt_nllb)
|
| 321 |
+
logger.info("[NLLB] forced_bos_token_id(%s) = %s", tgt_nllb, forced_bos)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
+
translations = []
|
| 324 |
BATCH = 8
|
| 325 |
+
for i in range(0, len(to_translate), BATCH):
|
| 326 |
+
chunk = to_translate[i:i + BATCH]
|
| 327 |
+
max_new = max(_adaptive_max_tokens(s) for s in chunk)
|
| 328 |
+
inputs = nllb_tokenizer(
|
| 329 |
+
chunk, return_tensors="pt", padding=True,
|
| 330 |
+
truncation=True, max_length=512,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
)
|
| 332 |
+
with torch.no_grad():
|
| 333 |
+
outputs = nllb_model.generate(
|
| 334 |
+
**inputs,
|
| 335 |
+
forced_bos_token_id=forced_bos,
|
| 336 |
+
max_new_tokens=max_new,
|
| 337 |
+
num_beams=2,
|
| 338 |
+
no_repeat_ngram_size=3,
|
| 339 |
+
early_stopping=True,
|
| 340 |
+
)
|
| 341 |
+
decoded = nllb_tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
| 342 |
+
for src_s, out_s in zip(chunk, decoded):
|
| 343 |
+
logger.info("[NLLB] %r -> %r", src_s[:60], out_s[:60])
|
| 344 |
+
translations.extend(decoded)
|
| 345 |
+
|
| 346 |
+
logger.info("[NLLB] Egina %.1fs-tan", time.time() - t0)
|
| 347 |
+
return _rebuild(blocks, [t.strip() for t in translations])
|
| 348 |
|
| 349 |
|
| 350 |
def _hitz_translate(text: str, src: str, tgt: str) -> str:
|
|
|
|
| 403 |
logger.info("Reader kargatzen (quantize=True): %s %s", name, langs)
|
| 404 |
readers[name] = easyocr.Reader(langs, gpu=False, quantize=True)
|
| 405 |
|
| 406 |
+
global nllb_model, nllb_tokenizer
|
| 407 |
+
logger.info("[LOAD] NLLB eredua kargatzen: %s", NLLB_MODEL_NAME)
|
| 408 |
nllb_tokenizer = AutoTokenizer.from_pretrained(NLLB_MODEL_NAME)
|
| 409 |
+
nllb_model = AutoModelForSeq2SeqLM.from_pretrained(NLLB_MODEL_NAME)
|
| 410 |
+
nllb_model.eval()
|
| 411 |
+
logger.info("[LOAD] NLLB mota: %s | tokenizer: %s",
|
| 412 |
+
nllb_model.__class__.__name__,
|
| 413 |
+
nllb_tokenizer.__class__.__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
|
| 415 |
# HiTZ aldi baterako desaktibatuta (transformers bateragarritasun arazoak)
|
| 416 |
logger.info("[LOAD] HiTZ karga saltatzen (NLLB soilik modua)")
|
|
|
|
| 424 |
yield
|
| 425 |
readers.clear()
|
| 426 |
hitz_models.clear()
|
|
|
|
| 427 |
|
| 428 |
|
| 429 |
app = FastAPI(title="OCR + Itzulpena API", version="16.0.0", lifespan=lifespan)
|
|
|
|
| 441 |
"status": "ok",
|
| 442 |
"scripts": list(readers.keys()),
|
| 443 |
"gemini": bool(GEMINI_API_KEY),
|
| 444 |
+
"nllb": nllb_model is not None,
|
| 445 |
+
"nllb_class": nllb_model.__class__.__name__ if nllb_model else None,
|
|
|
|
| 446 |
"hitz_pairs": [f"{s}-{t}" for (s, t) in hitz_models.keys()],
|
| 447 |
}
|
| 448 |
|
|
|
|
| 468 |
img_array = np.array(pil_image)
|
| 469 |
img_array = _deskew(img_array)
|
| 470 |
reader = readers[script]
|
| 471 |
+
results = reader.readtext(img_array, detail=1, paragraph=False)
|
|
|
|
|
|
|
|
|
|
| 472 |
raw_text = _group_into_lines(results)
|
| 473 |
logger.info("[OCR] Egina %.1fs-tan, %d karaktere", time.time() - t0, len(raw_text))
|
| 474 |
|
requirements.txt
CHANGED
|
@@ -10,5 +10,4 @@ httpx==0.27.0
|
|
| 10 |
transformers==4.37.0
|
| 11 |
torch==2.2.2
|
| 12 |
sentencepiece==0.2.0
|
| 13 |
-
sacremoses==0.1.1
|
| 14 |
-
ctranslate2==4.5.0
|
|
|
|
| 10 |
transformers==4.37.0
|
| 11 |
torch==2.2.2
|
| 12 |
sentencepiece==0.2.0
|
| 13 |
+
sacremoses==0.1.1
|
|
|