GitHub Actions commited on
Commit ·
6fd8157
1
Parent(s): dcd799a
Deploy from GitHub (3f6ecba)
Browse files- .gitattributes +1 -1
- app.py +13 -8
- config.py +4 -1
- pyproject.toml +1 -1
- rag/feedback.py +20 -1
- static/app.js +67 -0
- static/index.html +1 -1
- templates/loading.html +1 -1
- uv.lock +1 -1
.gitattributes
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
|
|
| 1 |
data/*.json filter=lfs diff=lfs merge=lfs -text
|
| 2 |
data/*.db filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
data/*.faiss filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
data/*.faiss filter=lfs diff=lfs merge=lfs -text
|
| 2 |
data/*.json filter=lfs diff=lfs merge=lfs -text
|
| 3 |
data/*.db filter=lfs diff=lfs merge=lfs -text
|
|
|
app.py
CHANGED
|
@@ -189,14 +189,16 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
|
| 189 |
"""Spawn background pipeline loading so HTTP is available immediately."""
|
| 190 |
thread = threading.Thread(target=_load_pipeline_background, daemon=True)
|
| 191 |
thread.start()
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
| 197 |
yield
|
| 198 |
-
|
| 199 |
-
|
|
|
|
| 200 |
pipeline.clear()
|
| 201 |
pipeline_ready.clear()
|
| 202 |
|
|
@@ -350,9 +352,10 @@ def feedback_endpoint(
|
|
| 350 |
verse: str = Form(""),
|
| 351 |
score: float = Form(0.0),
|
| 352 |
feedback: str = Form(""),
|
|
|
|
| 353 |
) -> Response:
|
| 354 |
"""Record per-verse relevance feedback."""
|
| 355 |
-
if feedback not in ("up", "down"):
|
| 356 |
return Response(status_code=400)
|
| 357 |
try:
|
| 358 |
record_feedback(
|
|
@@ -365,6 +368,8 @@ def feedback_endpoint(
|
|
| 365 |
buffer_path=config.FEEDBACK_BUFFER_PATH,
|
| 366 |
flush_threshold=config.FEEDBACK_FLUSH_THRESHOLD,
|
| 367 |
hf_repo=config.FEEDBACK_HF_REPO,
|
|
|
|
|
|
|
| 368 |
)
|
| 369 |
except Exception:
|
| 370 |
logger.exception("Failed to record feedback")
|
|
|
|
| 189 |
"""Spawn background pipeline loading so HTTP is available immediately."""
|
| 190 |
thread = threading.Thread(target=_load_pipeline_background, daemon=True)
|
| 191 |
thread.start()
|
| 192 |
+
if config.FEEDBACK_ENV == "production":
|
| 193 |
+
start_flush_scheduler(
|
| 194 |
+
config.FEEDBACK_BUFFER_PATH,
|
| 195 |
+
config.FEEDBACK_HF_REPO,
|
| 196 |
+
config.FEEDBACK_FLUSH_INTERVAL_S,
|
| 197 |
+
)
|
| 198 |
yield
|
| 199 |
+
if config.FEEDBACK_ENV == "production":
|
| 200 |
+
stop_flush_scheduler()
|
| 201 |
+
flush_remaining(config.FEEDBACK_BUFFER_PATH, config.FEEDBACK_HF_REPO)
|
| 202 |
pipeline.clear()
|
| 203 |
pipeline_ready.clear()
|
| 204 |
|
|
|
|
| 352 |
verse: str = Form(""),
|
| 353 |
score: float = Form(0.0),
|
| 354 |
feedback: str = Form(""),
|
| 355 |
+
session_id: str = Form(""),
|
| 356 |
) -> Response:
|
| 357 |
"""Record per-verse relevance feedback."""
|
| 358 |
+
if feedback not in ("up", "down", "cancel_up", "cancel_down"):
|
| 359 |
return Response(status_code=400)
|
| 360 |
try:
|
| 361 |
record_feedback(
|
|
|
|
| 368 |
buffer_path=config.FEEDBACK_BUFFER_PATH,
|
| 369 |
flush_threshold=config.FEEDBACK_FLUSH_THRESHOLD,
|
| 370 |
hf_repo=config.FEEDBACK_HF_REPO,
|
| 371 |
+
source=config.FEEDBACK_ENV,
|
| 372 |
+
session_id=session_id,
|
| 373 |
)
|
| 374 |
except Exception:
|
| 375 |
logger.exception("Failed to record feedback")
|
config.py
CHANGED
|
@@ -50,7 +50,10 @@ MAX_QUERY_LENGTH: int = 300
|
|
| 50 |
CONTEXT_VERSES: int = 2
|
| 51 |
|
| 52 |
# Feedback
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
FEEDBACK_HF_REPO: str = os.environ.get("FEEDBACK_HF_REPO", "adedaran/rag-bible-feedback")
|
| 55 |
FEEDBACK_FLUSH_THRESHOLD: int = 5
|
| 56 |
FEEDBACK_FLUSH_INTERVAL_S: int = 300 # 5 minutes
|
|
|
|
| 50 |
CONTEXT_VERSES: int = 2
|
| 51 |
|
| 52 |
# Feedback
|
| 53 |
+
_is_production = bool(os.environ.get("SPACE_ID"))
|
| 54 |
+
FEEDBACK_ENV: str = os.environ.get("FEEDBACK_ENV", "production" if _is_production else "local")
|
| 55 |
+
_buffer_suffix = "" if FEEDBACK_ENV == "production" else f"_{FEEDBACK_ENV}"
|
| 56 |
+
FEEDBACK_BUFFER_PATH: Path = DATA_DIR / f"feedback_buffer{_buffer_suffix}.jsonl"
|
| 57 |
FEEDBACK_HF_REPO: str = os.environ.get("FEEDBACK_HF_REPO", "adedaran/rag-bible-feedback")
|
| 58 |
FEEDBACK_FLUSH_THRESHOLD: int = 5
|
| 59 |
FEEDBACK_FLUSH_INTERVAL_S: int = 300 # 5 minutes
|
pyproject.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
[project]
|
| 2 |
name = "rag-bible"
|
| 3 |
-
version = "1.
|
| 4 |
description = "French Bible RAG system with FAISS + cross-encoder reranking"
|
| 5 |
requires-python = ">=3.12"
|
| 6 |
dependencies = [
|
|
|
|
| 1 |
[project]
|
| 2 |
name = "rag-bible"
|
| 3 |
+
version = "1.2.0"
|
| 4 |
description = "French Bible RAG system with FAISS + cross-encoder reranking"
|
| 5 |
requires-python = ">=3.12"
|
| 6 |
dependencies = [
|
rag/feedback.py
CHANGED
|
@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__)
|
|
| 11 |
_lock = threading.Lock()
|
| 12 |
_scheduler: threading.Timer | None = None
|
| 13 |
_count = 0
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
def record_feedback(
|
|
@@ -23,6 +24,8 @@ def record_feedback(
|
|
| 23 |
buffer_path: Path,
|
| 24 |
flush_threshold: int,
|
| 25 |
hf_repo: str,
|
|
|
|
|
|
|
| 26 |
) -> None:
|
| 27 |
"""Append a feedback record to the local JSONL buffer.
|
| 28 |
|
|
@@ -46,9 +49,23 @@ def record_feedback(
|
|
| 46 |
Number of records that triggers an automatic flush.
|
| 47 |
hf_repo : str
|
| 48 |
HuggingFace Dataset repo ID for flushing.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
"""
|
| 50 |
global _count # noqa: PLW0603
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
record = {
|
| 53 |
"query": query,
|
| 54 |
"book_title": book_title,
|
|
@@ -56,6 +73,8 @@ def record_feedback(
|
|
| 56 |
"verse": verse,
|
| 57 |
"score": score,
|
| 58 |
"feedback": feedback,
|
|
|
|
|
|
|
| 59 |
"timestamp": datetime.now(UTC).isoformat(),
|
| 60 |
}
|
| 61 |
|
|
@@ -65,7 +84,7 @@ def record_feedback(
|
|
| 65 |
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
| 66 |
_count += 1
|
| 67 |
|
| 68 |
-
if _count >= flush_threshold:
|
| 69 |
_count = 0
|
| 70 |
threading.Thread(
|
| 71 |
target=_flush_to_hub,
|
|
|
|
| 11 |
_lock = threading.Lock()
|
| 12 |
_scheduler: threading.Timer | None = None
|
| 13 |
_count = 0
|
| 14 |
+
_seen: dict[tuple[str, ...], str] = {}
|
| 15 |
|
| 16 |
|
| 17 |
def record_feedback(
|
|
|
|
| 24 |
buffer_path: Path,
|
| 25 |
flush_threshold: int,
|
| 26 |
hf_repo: str,
|
| 27 |
+
source: str = "local",
|
| 28 |
+
session_id: str = "",
|
| 29 |
) -> None:
|
| 30 |
"""Append a feedback record to the local JSONL buffer.
|
| 31 |
|
|
|
|
| 49 |
Number of records that triggers an automatic flush.
|
| 50 |
hf_repo : str
|
| 51 |
HuggingFace Dataset repo ID for flushing.
|
| 52 |
+
source : str
|
| 53 |
+
Environment source tag ("production" or "local").
|
| 54 |
+
session_id : str
|
| 55 |
+
Browser session identifier (UUID from client).
|
| 56 |
"""
|
| 57 |
global _count # noqa: PLW0603
|
| 58 |
|
| 59 |
+
dedup_key = (session_id, query, book_title, chapter, verse)
|
| 60 |
+
|
| 61 |
+
with _lock:
|
| 62 |
+
if feedback in ("up", "down"):
|
| 63 |
+
if _seen.get(dedup_key) == feedback:
|
| 64 |
+
return
|
| 65 |
+
_seen[dedup_key] = feedback
|
| 66 |
+
elif feedback.startswith("cancel_"):
|
| 67 |
+
_seen.pop(dedup_key, None)
|
| 68 |
+
|
| 69 |
record = {
|
| 70 |
"query": query,
|
| 71 |
"book_title": book_title,
|
|
|
|
| 73 |
"verse": verse,
|
| 74 |
"score": score,
|
| 75 |
"feedback": feedback,
|
| 76 |
+
"source": source,
|
| 77 |
+
"session_id": session_id,
|
| 78 |
"timestamp": datetime.now(UTC).isoformat(),
|
| 79 |
}
|
| 80 |
|
|
|
|
| 84 |
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
| 85 |
_count += 1
|
| 86 |
|
| 87 |
+
if _count >= flush_threshold and source == "production":
|
| 88 |
_count = 0
|
| 89 |
threading.Thread(
|
| 90 |
target=_flush_to_hub,
|
static/app.js
CHANGED
|
@@ -435,6 +435,48 @@ document.addEventListener("DOMContentLoaded", function () {
|
|
| 435 |
var container = document.getElementById("results-container");
|
| 436 |
if (!container) return;
|
| 437 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
container.addEventListener("click", function (evt) {
|
| 439 |
var btn = evt.target.closest(".feedback-btn");
|
| 440 |
if (!btn) return;
|
|
@@ -443,9 +485,32 @@ document.addEventListener("DOMContentLoaded", function () {
|
|
| 443 |
if (!card) return;
|
| 444 |
|
| 445 |
var isActive = btn.getAttribute("aria-pressed") === "true";
|
|
|
|
| 446 |
|
| 447 |
if (isActive) {
|
| 448 |
btn.setAttribute("aria-pressed", "false");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
return;
|
| 450 |
}
|
| 451 |
|
|
@@ -457,6 +522,7 @@ document.addEventListener("DOMContentLoaded", function () {
|
|
| 457 |
}
|
| 458 |
|
| 459 |
btn.setAttribute("aria-pressed", "true");
|
|
|
|
| 460 |
|
| 461 |
var params = new URLSearchParams();
|
| 462 |
params.set("query", card.dataset.query || "");
|
|
@@ -465,6 +531,7 @@ document.addEventListener("DOMContentLoaded", function () {
|
|
| 465 |
params.set("verse", card.dataset.verse || "");
|
| 466 |
params.set("score", card.dataset.score || "0");
|
| 467 |
params.set("feedback", btn.dataset.feedback);
|
|
|
|
| 468 |
|
| 469 |
fetch("/feedback", {
|
| 470 |
method: "POST",
|
|
|
|
| 435 |
var container = document.getElementById("results-container");
|
| 436 |
if (!container) return;
|
| 437 |
|
| 438 |
+
var sessionId = sessionStorage.getItem("feedback_session_id");
|
| 439 |
+
if (!sessionId) {
|
| 440 |
+
sessionId = crypto.randomUUID();
|
| 441 |
+
sessionStorage.setItem("feedback_session_id", sessionId);
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
function feedbackKey(card) {
|
| 445 |
+
return (
|
| 446 |
+
"fb:" +
|
| 447 |
+
(card.dataset.query || "") +
|
| 448 |
+
"|" +
|
| 449 |
+
(card.dataset.bookTitle || "") +
|
| 450 |
+
"|" +
|
| 451 |
+
(card.dataset.chapter || "") +
|
| 452 |
+
"|" +
|
| 453 |
+
(card.dataset.verse || "")
|
| 454 |
+
);
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
function restoreFeedbackState() {
|
| 458 |
+
var cards = container.querySelectorAll(".result-card");
|
| 459 |
+
cards.forEach(function (card) {
|
| 460 |
+
var stored = sessionStorage.getItem(feedbackKey(card));
|
| 461 |
+
if (!stored) return;
|
| 462 |
+
var btn = card.querySelector(
|
| 463 |
+
'.feedback-btn[data-feedback="' + stored + '"]'
|
| 464 |
+
);
|
| 465 |
+
if (btn) btn.setAttribute("aria-pressed", "true");
|
| 466 |
+
});
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
restoreFeedbackState();
|
| 470 |
+
|
| 471 |
+
document.body.addEventListener("htmx:afterSwap", function (e) {
|
| 472 |
+
if (
|
| 473 |
+
e.detail.target &&
|
| 474 |
+
e.detail.target.id === "results-container"
|
| 475 |
+
) {
|
| 476 |
+
restoreFeedbackState();
|
| 477 |
+
}
|
| 478 |
+
});
|
| 479 |
+
|
| 480 |
container.addEventListener("click", function (evt) {
|
| 481 |
var btn = evt.target.closest(".feedback-btn");
|
| 482 |
if (!btn) return;
|
|
|
|
| 485 |
if (!card) return;
|
| 486 |
|
| 487 |
var isActive = btn.getAttribute("aria-pressed") === "true";
|
| 488 |
+
var key = feedbackKey(card);
|
| 489 |
|
| 490 |
if (isActive) {
|
| 491 |
btn.setAttribute("aria-pressed", "false");
|
| 492 |
+
sessionStorage.removeItem(key);
|
| 493 |
+
|
| 494 |
+
var cancelParams = new URLSearchParams();
|
| 495 |
+
cancelParams.set("query", card.dataset.query || "");
|
| 496 |
+
cancelParams.set("book_title", card.dataset.bookTitle || "");
|
| 497 |
+
cancelParams.set("chapter", card.dataset.chapter || "");
|
| 498 |
+
cancelParams.set("verse", card.dataset.verse || "");
|
| 499 |
+
cancelParams.set("score", card.dataset.score || "0");
|
| 500 |
+
cancelParams.set(
|
| 501 |
+
"feedback",
|
| 502 |
+
"cancel_" + btn.dataset.feedback
|
| 503 |
+
);
|
| 504 |
+
cancelParams.set("session_id", sessionId);
|
| 505 |
+
|
| 506 |
+
fetch("/feedback", {
|
| 507 |
+
method: "POST",
|
| 508 |
+
headers: {
|
| 509 |
+
"Content-Type": "application/x-www-form-urlencoded"
|
| 510 |
+
},
|
| 511 |
+
body: cancelParams.toString()
|
| 512 |
+
}).catch(function () {});
|
| 513 |
+
|
| 514 |
return;
|
| 515 |
}
|
| 516 |
|
|
|
|
| 522 |
}
|
| 523 |
|
| 524 |
btn.setAttribute("aria-pressed", "true");
|
| 525 |
+
sessionStorage.setItem(key, btn.dataset.feedback);
|
| 526 |
|
| 527 |
var params = new URLSearchParams();
|
| 528 |
params.set("query", card.dataset.query || "");
|
|
|
|
| 531 |
params.set("verse", card.dataset.verse || "");
|
| 532 |
params.set("score", card.dataset.score || "0");
|
| 533 |
params.set("feedback", btn.dataset.feedback);
|
| 534 |
+
params.set("session_id", sessionId);
|
| 535 |
|
| 536 |
fetch("/feedback", {
|
| 537 |
method: "POST",
|
static/index.html
CHANGED
|
@@ -65,7 +65,7 @@
|
|
| 65 |
<h2 class="sidebar-title">Historique</h2>
|
| 66 |
<div class="history-list"></div>
|
| 67 |
<p class="history-empty">Vos recherches apparaîtront ici</p>
|
| 68 |
-
<p class="sidebar-version">v1.
|
| 69 |
</aside>
|
| 70 |
|
| 71 |
<div class="hero-section">
|
|
|
|
| 65 |
<h2 class="sidebar-title">Historique</h2>
|
| 66 |
<div class="history-list"></div>
|
| 67 |
<p class="history-empty">Vos recherches apparaîtront ici</p>
|
| 68 |
+
<p class="sidebar-version">v1.2.0</p>
|
| 69 |
</aside>
|
| 70 |
|
| 71 |
<div class="hero-section">
|
templates/loading.html
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
<div class="status-message loading-message" role="status" aria-live="polite"
|
| 2 |
hx-post="/search" hx-trigger="load delay:2s"
|
| 3 |
-
hx-target="#results" hx-swap="innerHTML"
|
| 4 |
hx-vals='{"query": "{{ query }}"}'>
|
| 5 |
<p>Chargement des modèles en cours...</p>
|
| 6 |
<div class="loading-spinner" aria-hidden="true"></div>
|
|
|
|
| 1 |
<div class="status-message loading-message" role="status" aria-live="polite"
|
| 2 |
hx-post="/search" hx-trigger="load delay:2s"
|
| 3 |
+
hx-target="#results-container" hx-swap="innerHTML"
|
| 4 |
hx-vals='{"query": "{{ query }}"}'>
|
| 5 |
<p>Chargement des modèles en cours...</p>
|
| 6 |
<div class="loading-spinner" aria-hidden="true"></div>
|
uv.lock
CHANGED
|
@@ -1168,7 +1168,7 @@ wheels = [
|
|
| 1168 |
|
| 1169 |
[[package]]
|
| 1170 |
name = "rag-bible"
|
| 1171 |
-
version = "1.1.
|
| 1172 |
source = { virtual = "." }
|
| 1173 |
dependencies = [
|
| 1174 |
{ name = "faiss-cpu" },
|
|
|
|
| 1168 |
|
| 1169 |
[[package]]
|
| 1170 |
name = "rag-bible"
|
| 1171 |
+
version = "1.1.2"
|
| 1172 |
source = { virtual = "." }
|
| 1173 |
dependencies = [
|
| 1174 |
{ name = "faiss-cpu" },
|