Update app.py
Browse files
app.py
CHANGED
|
@@ -211,13 +211,19 @@ def esm2_embed(seq: str, model_name: str = "facebook/esm2_t6_8M_UR50D") -> Dict:
|
|
| 211 |
model.eval()
|
| 212 |
|
| 213 |
with torch.no_grad():
|
| 214 |
-
inputs = tokenizer(seq, return_tensors="pt")
|
| 215 |
outputs = model(**inputs, output_hidden_states=True)
|
| 216 |
hidden = outputs.hidden_states[-1].mean(dim=1).squeeze(0)
|
| 217 |
-
vec = hidden.numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
| 219 |
return {
|
| 220 |
-
"embedding": vec.tolist(),
|
| 221 |
"size": vec.shape[0]
|
| 222 |
}
|
| 223 |
except Exception as e:
|
|
@@ -229,6 +235,12 @@ def dna_embed(seq: str, model_name: str = "zhihan1996/DNABERT-2-117M") -> Dict:
|
|
| 229 |
return {"error": "PyTorch/Transformers not available"}
|
| 230 |
|
| 231 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 233 |
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
|
| 234 |
model.eval()
|
|
@@ -237,10 +249,16 @@ def dna_embed(seq: str, model_name: str = "zhihan1996/DNABERT-2-117M") -> Dict:
|
|
| 237 |
inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=512)
|
| 238 |
outputs = model(**inputs)
|
| 239 |
hidden = outputs.last_hidden_state.mean(dim=1).squeeze(0)
|
| 240 |
-
vec = hidden.numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
return {
|
| 243 |
-
"embedding": vec.tolist(),
|
| 244 |
"size": vec.shape[0]
|
| 245 |
}
|
| 246 |
except Exception as e:
|
|
|
|
| 211 |
model.eval()
|
| 212 |
|
| 213 |
with torch.no_grad():
|
| 214 |
+
inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=1024)
|
| 215 |
outputs = model(**inputs, output_hidden_states=True)
|
| 216 |
hidden = outputs.hidden_states[-1].mean(dim=1).squeeze(0)
|
| 217 |
+
vec = hidden.cpu().numpy()
|
| 218 |
+
|
| 219 |
+
# 메모리 정리
|
| 220 |
+
del model
|
| 221 |
+
del tokenizer
|
| 222 |
+
if torch.cuda.is_available():
|
| 223 |
+
torch.cuda.empty_cache()
|
| 224 |
|
| 225 |
return {
|
| 226 |
+
"embedding": vec.tolist()[:10], # 미리보기용 첫 10개만
|
| 227 |
"size": vec.shape[0]
|
| 228 |
}
|
| 229 |
except Exception as e:
|
|
|
|
| 235 |
return {"error": "PyTorch/Transformers not available"}
|
| 236 |
|
| 237 |
try:
|
| 238 |
+
# einops 체크
|
| 239 |
+
try:
|
| 240 |
+
import einops
|
| 241 |
+
except ImportError:
|
| 242 |
+
return {"error": "einops package required. Installing..."}
|
| 243 |
+
|
| 244 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 245 |
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
|
| 246 |
model.eval()
|
|
|
|
| 249 |
inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=512)
|
| 250 |
outputs = model(**inputs)
|
| 251 |
hidden = outputs.last_hidden_state.mean(dim=1).squeeze(0)
|
| 252 |
+
vec = hidden.cpu().numpy()
|
| 253 |
+
|
| 254 |
+
# 메모리 정리
|
| 255 |
+
del model
|
| 256 |
+
del tokenizer
|
| 257 |
+
if torch.cuda.is_available():
|
| 258 |
+
torch.cuda.empty_cache()
|
| 259 |
|
| 260 |
return {
|
| 261 |
+
"embedding": vec.tolist()[:10], # 미리보기용 첫 10개만
|
| 262 |
"size": vec.shape[0]
|
| 263 |
}
|
| 264 |
except Exception as e:
|