Lev Israel commited on
Commit ·
28c483a
1
Parent(s): 2575879
Float 16, and better polling
Browse files
app.py
CHANGED
|
@@ -587,14 +587,24 @@ def create_app():
|
|
| 587 |
if (window.jobPollInterval) {
|
| 588 |
clearInterval(window.jobPollInterval);
|
| 589 |
}
|
|
|
|
| 590 |
// Auto-click the check status button every 5 seconds
|
| 591 |
window.jobPollInterval = setInterval(() => {
|
| 592 |
-
|
| 593 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
if (checkBtn && checkBtn.offsetParent !== null) {
|
|
|
|
| 595 |
checkBtn.click();
|
| 596 |
} else {
|
| 597 |
// Button is hidden (job done), stop polling
|
|
|
|
| 598 |
clearInterval(window.jobPollInterval);
|
| 599 |
window.jobPollInterval = null;
|
| 600 |
}
|
|
|
|
| 587 |
if (window.jobPollInterval) {
|
| 588 |
clearInterval(window.jobPollInterval);
|
| 589 |
}
|
| 590 |
+
console.log('[Auto-poll] Starting polling every 5 seconds');
|
| 591 |
// Auto-click the check status button every 5 seconds
|
| 592 |
window.jobPollInterval = setInterval(() => {
|
| 593 |
+
// Find button by looking for "Check Status" text
|
| 594 |
+
const buttons = document.querySelectorAll('button');
|
| 595 |
+
let checkBtn = null;
|
| 596 |
+
for (const btn of buttons) {
|
| 597 |
+
if (btn.textContent.includes('Check Status')) {
|
| 598 |
+
checkBtn = btn;
|
| 599 |
+
break;
|
| 600 |
+
}
|
| 601 |
+
}
|
| 602 |
if (checkBtn && checkBtn.offsetParent !== null) {
|
| 603 |
+
console.log('[Auto-poll] Clicking Check Status button');
|
| 604 |
checkBtn.click();
|
| 605 |
} else {
|
| 606 |
// Button is hidden (job done), stop polling
|
| 607 |
+
console.log('[Auto-poll] Button not visible, stopping');
|
| 608 |
clearInterval(window.jobPollInterval);
|
| 609 |
window.jobPollInterval = null;
|
| 610 |
}
|
models.py
CHANGED
|
@@ -248,9 +248,17 @@ class EmbeddingModel(BaseEmbeddingModel):
|
|
| 248 |
"passage_prefix": "",
|
| 249 |
})
|
| 250 |
|
| 251 |
-
# Load the model
|
|
|
|
| 252 |
print(f"Loading model: {model_id} on {device}")
|
| 253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
# Set max sequence length if supported
|
| 256 |
if hasattr(self.model, "max_seq_length"):
|
|
|
|
| 248 |
"passage_prefix": "",
|
| 249 |
})
|
| 250 |
|
| 251 |
+
# Load the model with float16 on CUDA to save VRAM
|
| 252 |
+
# (12B model: float32 = 48GB, float16 = 24GB)
|
| 253 |
print(f"Loading model: {model_id} on {device}")
|
| 254 |
+
if device == "cuda":
|
| 255 |
+
self.model = SentenceTransformer(
|
| 256 |
+
model_id,
|
| 257 |
+
device=device,
|
| 258 |
+
model_kwargs={"torch_dtype": torch.float16},
|
| 259 |
+
)
|
| 260 |
+
else:
|
| 261 |
+
self.model = SentenceTransformer(model_id, device=device)
|
| 262 |
|
| 263 |
# Set max sequence length if supported
|
| 264 |
if hasattr(self.model, "max_seq_length"):
|