openfree commited on
Commit
16c1e8a
·
verified ·
1 Parent(s): 5f95b89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -5
app.py CHANGED
@@ -211,13 +211,19 @@ def esm2_embed(seq: str, model_name: str = "facebook/esm2_t6_8M_UR50D") -> Dict:
211
  model.eval()
212
 
213
  with torch.no_grad():
214
- inputs = tokenizer(seq, return_tensors="pt")
215
  outputs = model(**inputs, output_hidden_states=True)
216
  hidden = outputs.hidden_states[-1].mean(dim=1).squeeze(0)
217
- vec = hidden.numpy()
 
 
 
 
 
 
218
 
219
  return {
220
- "embedding": vec.tolist(),
221
  "size": vec.shape[0]
222
  }
223
  except Exception as e:
@@ -229,6 +235,12 @@ def dna_embed(seq: str, model_name: str = "zhihan1996/DNABERT-2-117M") -> Dict:
229
  return {"error": "PyTorch/Transformers not available"}
230
 
231
  try:
 
 
 
 
 
 
232
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
233
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
234
  model.eval()
@@ -237,10 +249,16 @@ def dna_embed(seq: str, model_name: str = "zhihan1996/DNABERT-2-117M") -> Dict:
237
  inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=512)
238
  outputs = model(**inputs)
239
  hidden = outputs.last_hidden_state.mean(dim=1).squeeze(0)
240
- vec = hidden.numpy()
 
 
 
 
 
 
241
 
242
  return {
243
- "embedding": vec.tolist(),
244
  "size": vec.shape[0]
245
  }
246
  except Exception as e:
 
211
  model.eval()
212
 
213
  with torch.no_grad():
214
+ inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=1024)
215
  outputs = model(**inputs, output_hidden_states=True)
216
  hidden = outputs.hidden_states[-1].mean(dim=1).squeeze(0)
217
+ vec = hidden.cpu().numpy()
218
+
219
+ # 메모리 정리
220
+ del model
221
+ del tokenizer
222
+ if torch.cuda.is_available():
223
+ torch.cuda.empty_cache()
224
 
225
  return {
226
+ "embedding": vec.tolist()[:10], # 미리보기용 첫 10개만
227
  "size": vec.shape[0]
228
  }
229
  except Exception as e:
 
235
  return {"error": "PyTorch/Transformers not available"}
236
 
237
  try:
238
+ # einops 체크
239
+ try:
240
+ import einops
241
+ except ImportError:
242
+ return {"error": "einops package required. Installing..."}
243
+
244
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
245
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
246
  model.eval()
 
249
  inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=512)
250
  outputs = model(**inputs)
251
  hidden = outputs.last_hidden_state.mean(dim=1).squeeze(0)
252
+ vec = hidden.cpu().numpy()
253
+
254
+ # 메모리 정리
255
+ del model
256
+ del tokenizer
257
+ if torch.cuda.is_available():
258
+ torch.cuda.empty_cache()
259
 
260
  return {
261
+ "embedding": vec.tolist()[:10], # 미리보기용 첫 10개만
262
  "size": vec.shape[0]
263
  }
264
  except Exception as e: