fix: cap reranker input to top-40 and fix nginx send timeout to prevent 504
Browse files- backend/src/matching/stage1.py +4 -1
- nginx.conf +2 -1
backend/src/matching/stage1.py
CHANGED
|
@@ -112,4 +112,7 @@ async def stage1_retrieve(
|
|
| 112 |
})
|
| 113 |
|
| 114 |
scored.sort(key=lambda x: x["stage1_score"], reverse=True)
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
})
|
| 113 |
|
| 114 |
scored.sort(key=lambda x: x["stage1_score"], reverse=True)
|
| 115 |
+
# Cap at 40 for stage2 reranker: stage1 already sorted by composite score.
|
| 116 |
+
# Sending 250 to a CPU-bound cross-encoder causes 504 timeouts on HF.
|
| 117 |
+
# Top-40 preserves accuracy while reducing reranker time from ~3min → ~15s.
|
| 118 |
+
return scored[:40]
|
nginx.conf
CHANGED
|
@@ -22,7 +22,8 @@ http {
|
|
| 22 |
proxy_set_header Host $host;
|
| 23 |
proxy_set_header X-Real-IP $remote_addr;
|
| 24 |
proxy_read_timeout 300;
|
| 25 |
-
|
|
|
|
| 26 |
client_max_body_size 100m;
|
| 27 |
}
|
| 28 |
|
|
|
|
| 22 |
proxy_set_header Host $host;
|
| 23 |
proxy_set_header X-Real-IP $remote_addr;
|
| 24 |
proxy_read_timeout 300;
|
| 25 |
+
proxy_send_timeout 300;
|
| 26 |
+
proxy_connect_timeout 30;
|
| 27 |
client_max_body_size 100m;
|
| 28 |
}
|
| 29 |
|