File size: 916 Bytes
842daa2
4198d6e
842daa2
 
 
4198d6e
 
8a7967b
842daa2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4198d6e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
from transformers import BertTokenizer, BertConfig, TFBertModel
from fastapi import FastAPI

app = FastAPI()

MODEL_DIR = os.environ.get("MODEL_DIR", "/app/bert_tf")

# Guard: create dir if missing; avoid listing non-existent paths
os.makedirs(MODEL_DIR, exist_ok=True)

# Probe one level deep only if there are entries
candidates = [MODEL_DIR]
try:
    for x in os.listdir(MODEL_DIR):
        p = os.path.join(MODEL_DIR, x)
        if os.path.isdir(p):
            candidates.append(p)
except FileNotFoundError:
    pass

for d in candidates:
    if (os.path.isfile(os.path.join(d, "vocab.txt"))
        and os.path.isfile(os.path.join(d, "config.json"))):
        MODEL_DIR = d
        break

tok  = BertTokenizer(vocab_file=f"{MODEL_DIR}/vocab.txt", do_lower_case=True)
cfg  = BertConfig.from_json_file(f"{MODEL_DIR}/config.json")
model= TFBertModel.from_pretrained(MODEL_DIR, from_tf=True, config=cfg)