Spaces:

felixbet
/

biobert-emb

Running

felixbet commited on Nov 6

Commit

8a7967b

verified ·

1 Parent(s): a2d010b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,34 +4,16 @@ from pydantic import BaseModel
 from typing import Any, List
 from transformers import BertTokenizer, BertConfig, TFBertModel
 MODEL_DIR = os.environ.get("MODEL_DIR", "/app/bert_tf")
-PORT = int(os.environ.get("PORT", "7860"))
 tok  = BertTokenizer(vocab_file=f"{MODEL_DIR}/vocab.txt", do_lower_case=True)
 cfg  = BertConfig.from_json_file(f"{MODEL_DIR}/config.json")
 model= TFBertModel.from_pretrained(MODEL_DIR, from_tf=True, config=cfg)
-def encode(texts: List[str]):
-    ins  = tok(texts, padding=True, truncation=True, return_tensors="tf", max_length=512)
-    outs = model(ins)[0]
-    mask = tf.cast(tf.expand_dims(ins["attention_mask"], -1), tf.float32)
-    mean = tf.reduce_sum(outs*mask, axis=1) / tf.maximum(tf.reduce_sum(mask, axis=1), 1.0)
-    return tf.linalg.l2_normalize(mean, axis=1).numpy().tolist()
-_ = encode(["warmup"])
-app = FastAPI()
-class EmbReq(BaseModel):
-    input: Any
-@app.get("/health")
-def health():
-    return {"ok": True}
-@app.post("/v1/embeddings")
-def embeddings(req: EmbReq):
-    texts = req.input if isinstance(req.input, list) else [req.input]
-    vecs  = encode(texts)
-    return {"object":"list","model":"biobert-tf-emb",
-            "data":[{"object":"embedding","index":i,"embedding":v} for i,v in enumerate(vecs)]}

 from typing import Any, List
 from transformers import BertTokenizer, BertConfig, TFBertModel
+# start.sh exports MODEL_DIR after normalization
 MODEL_DIR = os.environ.get("MODEL_DIR", "/app/bert_tf")
+# Fallback: if still wrong, probe one level deeper
+if not os.path.isfile(os.path.join(MODEL_DIR, "vocab.txt")):
+    for d in [MODEL_DIR] + [os.path.join(MODEL_DIR, x) for x in os.listdir(MODEL_DIR) if os.path.isdir(os.path.join(MODEL_DIR, x))]:
+        if os.path.isfile(os.path.join(d, "vocab.txt")) and os.path.isfile(os.path.join(d, "config.json")):
+            MODEL_DIR = d
+            break
 tok  = BertTokenizer(vocab_file=f"{MODEL_DIR}/vocab.txt", do_lower_case=True)
 cfg  = BertConfig.from_json_file(f"{MODEL_DIR}/config.json")
 model= TFBertModel.from_pretrained(MODEL_DIR, from_tf=True, config=cfg)