Spaces:
Sleeping
Sleeping
github-actions[bot] commited on
Commit ·
5be0833
1
Parent(s): d72b225
🚀 Auto-deploy backend from GitHub (7245822)
Browse files- rag/curriculum_rag.py +24 -1
rag/curriculum_rag.py
CHANGED
|
@@ -148,7 +148,12 @@ def retrieve_lesson_pdf_context(
|
|
| 148 |
storage_path: str | None = None,
|
| 149 |
top_k: int = 8,
|
| 150 |
) -> Tuple[list[dict], str]:
|
| 151 |
-
"""Retrieve chunks by storage_path exact match + semantic ranking; fallback to general query.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
if storage_path:
|
| 153 |
exact_chunks = retrieve_curriculum_context(
|
| 154 |
query=topic,
|
|
@@ -160,12 +165,30 @@ def retrieve_lesson_pdf_context(
|
|
| 160 |
if exact_chunks and any(c["score"] >= 0.65 for c in exact_chunks):
|
| 161 |
return exact_chunks, "exact"
|
| 162 |
|
|
|
|
| 163 |
general_chunks = retrieve_curriculum_context(
|
| 164 |
query=topic,
|
| 165 |
subject=subject,
|
| 166 |
quarter=quarter,
|
| 167 |
top_k=top_k,
|
| 168 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
if storage_path and exact_chunks:
|
| 171 |
all_chunks = exact_chunks + general_chunks
|
|
|
|
| 148 |
storage_path: str | None = None,
|
| 149 |
top_k: int = 8,
|
| 150 |
) -> Tuple[list[dict], str]:
|
| 151 |
+
"""Retrieve chunks by storage_path exact match + semantic ranking; fallback to general query.
|
| 152 |
+
|
| 153 |
+
NOTE: Curriculum PDF chunks are often tagged with quarter=1 even when they cover all quarters.
|
| 154 |
+
We first try the exact quarter, then fallback to quarter=1, then no quarter filter.
|
| 155 |
+
"""
|
| 156 |
+
# Try 1: Exact match with storage_path + quarter
|
| 157 |
if storage_path:
|
| 158 |
exact_chunks = retrieve_curriculum_context(
|
| 159 |
query=topic,
|
|
|
|
| 165 |
if exact_chunks and any(c["score"] >= 0.65 for c in exact_chunks):
|
| 166 |
return exact_chunks, "exact"
|
| 167 |
|
| 168 |
+
# Try 2: General query with exact quarter
|
| 169 |
general_chunks = retrieve_curriculum_context(
|
| 170 |
query=topic,
|
| 171 |
subject=subject,
|
| 172 |
quarter=quarter,
|
| 173 |
top_k=top_k,
|
| 174 |
)
|
| 175 |
+
|
| 176 |
+
# Try 3: Fallback to quarter=1 (most curriculum PDFs are tagged Q1)
|
| 177 |
+
if not general_chunks and quarter != 1:
|
| 178 |
+
general_chunks = retrieve_curriculum_context(
|
| 179 |
+
query=topic,
|
| 180 |
+
subject=subject,
|
| 181 |
+
quarter=1,
|
| 182 |
+
top_k=top_k,
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# Try 4: Final fallback - no quarter filter at all
|
| 186 |
+
if not general_chunks:
|
| 187 |
+
general_chunks = retrieve_curriculum_context(
|
| 188 |
+
query=topic,
|
| 189 |
+
subject=subject,
|
| 190 |
+
top_k=top_k,
|
| 191 |
+
)
|
| 192 |
|
| 193 |
if storage_path and exact_chunks:
|
| 194 |
all_chunks = exact_chunks + general_chunks
|