Lev Israel commited on
Commit
28c483a
·
1 Parent(s): 2575879

Float 16, and better polling

Browse files
Files changed (2) hide show
  1. app.py +12 -2
  2. models.py +10 -2
app.py CHANGED
@@ -587,14 +587,24 @@ def create_app():
587
  if (window.jobPollInterval) {
588
  clearInterval(window.jobPollInterval);
589
  }
 
590
  // Auto-click the check status button every 5 seconds
591
  window.jobPollInterval = setInterval(() => {
592
- const checkBtn = document.querySelector('button:has(span:contains("Check Status"))') ||
593
- Array.from(document.querySelectorAll('button')).find(b => b.textContent.includes('Check Status'));
 
 
 
 
 
 
 
594
  if (checkBtn && checkBtn.offsetParent !== null) {
 
595
  checkBtn.click();
596
  } else {
597
  // Button is hidden (job done), stop polling
 
598
  clearInterval(window.jobPollInterval);
599
  window.jobPollInterval = null;
600
  }
 
587
  if (window.jobPollInterval) {
588
  clearInterval(window.jobPollInterval);
589
  }
590
+ console.log('[Auto-poll] Starting polling every 5 seconds');
591
  // Auto-click the check status button every 5 seconds
592
  window.jobPollInterval = setInterval(() => {
593
+ // Find button by looking for "Check Status" text
594
+ const buttons = document.querySelectorAll('button');
595
+ let checkBtn = null;
596
+ for (const btn of buttons) {
597
+ if (btn.textContent.includes('Check Status')) {
598
+ checkBtn = btn;
599
+ break;
600
+ }
601
+ }
602
  if (checkBtn && checkBtn.offsetParent !== null) {
603
+ console.log('[Auto-poll] Clicking Check Status button');
604
  checkBtn.click();
605
  } else {
606
  // Button is hidden (job done), stop polling
607
+ console.log('[Auto-poll] Button not visible, stopping');
608
  clearInterval(window.jobPollInterval);
609
  window.jobPollInterval = null;
610
  }
models.py CHANGED
@@ -248,9 +248,17 @@ class EmbeddingModel(BaseEmbeddingModel):
248
  "passage_prefix": "",
249
  })
250
 
251
- # Load the model
 
252
  print(f"Loading model: {model_id} on {device}")
253
- self.model = SentenceTransformer(model_id, device=device)
 
 
 
 
 
 
 
254
 
255
  # Set max sequence length if supported
256
  if hasattr(self.model, "max_seq_length"):
 
248
  "passage_prefix": "",
249
  })
250
 
251
+ # Load the model with float16 on CUDA to save VRAM
252
+ # (12B model: float32 = 48GB, float16 = 24GB)
253
  print(f"Loading model: {model_id} on {device}")
254
+ if device == "cuda":
255
+ self.model = SentenceTransformer(
256
+ model_id,
257
+ device=device,
258
+ model_kwargs={"torch_dtype": torch.float16},
259
+ )
260
+ else:
261
+ self.model = SentenceTransformer(model_id, device=device)
262
 
263
  # Set max sequence length if supported
264
  if hasattr(self.model, "max_seq_length"):