Commit ·
16fa4e7
1
Parent(s): c3f548e
Init project
Browse files- .gitignore +4 -0
- app.py +372 -0
- requirements.txt +19 -0
- src/__init__.py +2 -0
- src/config.py +61 -0
- src/embeddings.py +32 -0
- src/export.py +116 -0
- src/filters.py +86 -0
- src/indexing.py +173 -0
- src/learning.py +227 -0
- src/llm.py +42 -0
- src/prompts/answer.jinja2 +22 -0
- src/prompts/flashcards.jinja2 +35 -0
- src/prompts/quiz.jinja2 +37 -0
- src/prompts/summary_map.jinja2 +24 -0
- src/prompts/summary_reduce.jinja2 +29 -0
- src/prompts/summary_single.jinja2 +25 -0
- src/rag.py +119 -0
- src/schemas.py +105 -0
- src/store.py +152 -0
- static/aivn_logo.png +0 -0
- static/style.css +567 -0
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
storage/
|
| 3 |
+
data/
|
| 4 |
+
exports/
|
app.py
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import base64
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
|
| 10 |
+
from src.config import settings
|
| 11 |
+
from src.export import export
|
| 12 |
+
from src.filters import MetadataFilter, filters_to_dict
|
| 13 |
+
from src.indexing import save_and_ingest_pdf
|
| 14 |
+
from src.learning import generate_flashcards, generate_quiz, summarize
|
| 15 |
+
from src.llm import set_runtime_gemini_api_key
|
| 16 |
+
from src.rag import answer
|
| 17 |
+
from src.store import list_documents
|
| 18 |
+
|
| 19 |
+
_INFO_NOTE_HTML = """
|
| 20 |
+
<div class="info-card">
|
| 21 |
+
<div class="info-card-title">⚠️ Lưu ý sử dụng</div>
|
| 22 |
+
<ul class="info-card-list">
|
| 23 |
+
<li>Đây là ứng dụng demo phục vụ mục đích học tập và minh hoạ cho bài toán RAG.</li>
|
| 24 |
+
<li>Ứng dụng dùng <b>Gemini API</b>. Hãy nhập <b>Gemini API Key</b> trước khi chạy (key chỉ dùng trong phiên hiện tại, không lưu).</li>
|
| 25 |
+
<li>Hãy tải PDF trước, đợi hệ thống index xong, rồi mới dùng các tab Hỏi đáp, Tóm tắt, Quiz và Flashcards.</li>
|
| 26 |
+
</ul>
|
| 27 |
+
</div>
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
_USAGE_MARKDOWN = """
|
| 31 |
+
1. **Tải PDF** ở khối bên trái rồi bấm **Nạp & Index**.
|
| 32 |
+
2. **Chọn tài liệu** muốn học trong danh sách đã index.
|
| 33 |
+
3. Dùng các tab để **hỏi đáp**, **tóm tắt**, **tạo quiz** hoặc **flashcards**.
|
| 34 |
+
4. Nếu chỉ chọn đúng 1 tài liệu, bạn có thể lọc thêm theo **trang**.
|
| 35 |
+
|
| 36 |
+
**Mẹo:** Khi đặt câu hỏi rõ ràng theo chủ đề, kết quả RAG thường sát và dễ học hơn.
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
_CSS = Path("static/style.css").read_text(encoding="utf-8")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _img_b64(path: str) -> str:
|
| 43 |
+
with open(path, "rb") as file_obj:
|
| 44 |
+
return base64.b64encode(file_obj.read()).decode("utf-8")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _status_html(message: str) -> str:
|
| 48 |
+
return f'<div class="status-bar">{message}</div>'
|
| 49 |
+
|
| 50 |
+
def _read_uploaded_pdf(file_obj: object) -> tuple[bytes, str]:
|
| 51 |
+
"""Normalize Gradio file payload into (bytes, filename).
|
| 52 |
+
|
| 53 |
+
Gradio may pass:
|
| 54 |
+
- `str` / `NamedString`: a local filepath
|
| 55 |
+
- `FileData`: object with `.path` and optional `.orig_name`
|
| 56 |
+
- `dict`: with keys like `path` / `orig_name`
|
| 57 |
+
"""
|
| 58 |
+
if isinstance(file_obj, str):
|
| 59 |
+
p = Path(file_obj)
|
| 60 |
+
return p.read_bytes(), p.name
|
| 61 |
+
|
| 62 |
+
path = getattr(file_obj, "path", None)
|
| 63 |
+
orig = getattr(file_obj, "orig_name", None)
|
| 64 |
+
if isinstance(path, str) and path:
|
| 65 |
+
p = Path(path)
|
| 66 |
+
name = str(orig).strip() if isinstance(orig, str) and orig.strip() else p.name
|
| 67 |
+
return p.read_bytes(), name
|
| 68 |
+
|
| 69 |
+
if isinstance(file_obj, dict):
|
| 70 |
+
raw_path = file_obj.get("path")
|
| 71 |
+
raw_name = file_obj.get("orig_name") or file_obj.get("name")
|
| 72 |
+
if isinstance(raw_path, str) and raw_path:
|
| 73 |
+
p = Path(raw_path)
|
| 74 |
+
name = str(raw_name).strip() if isinstance(raw_name, str) and raw_name.strip() else p.name
|
| 75 |
+
return p.read_bytes(), name
|
| 76 |
+
|
| 77 |
+
raise TypeError(f"Unsupported uploaded file type: {type(file_obj).__name__}")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _filters(filenames: list[str] | None, page: int | None) -> dict[str, object] | None:
|
| 81 |
+
payload: dict[str, object] = {}
|
| 82 |
+
if filenames:
|
| 83 |
+
payload["filenames"] = filenames
|
| 84 |
+
if page is not None:
|
| 85 |
+
payload["page"] = page
|
| 86 |
+
return filters_to_dict(MetadataFilter.model_validate(payload)) if payload else None
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _refresh_docs() -> tuple[object, dict[str, Any], object, str, str]:
|
| 90 |
+
docs = list_documents()
|
| 91 |
+
choices = [d["filename"] for d in docs]
|
| 92 |
+
doc_map = {d["filename"]: d for d in docs}
|
| 93 |
+
if docs:
|
| 94 |
+
summary = (
|
| 95 |
+
f"**{len(docs)}** tài liệu đã index · "
|
| 96 |
+
f"**{sum(int(d['chunk_count']) for d in docs)}** đoạn văn"
|
| 97 |
+
)
|
| 98 |
+
else:
|
| 99 |
+
summary = "Chưa có tài liệu nào được index."
|
| 100 |
+
filenames_text = "\n".join(f"- `{name}`" for name in choices) if choices else "_Danh sách trống_"
|
| 101 |
+
return (
|
| 102 |
+
gr.update(choices=choices, value=[]),
|
| 103 |
+
doc_map,
|
| 104 |
+
gr.update(choices=["(Tất cả trang)"], value="(Tất cả trang)", interactive=True),
|
| 105 |
+
summary,
|
| 106 |
+
filenames_text,
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _pages_for_selection(doc_map: dict[str, Any], selected: list[str]) -> gr.Dropdown:
|
| 111 |
+
if len(selected) != 1:
|
| 112 |
+
return gr.update(choices=["(Tất cả trang)"], value="(Tất cả trang)", interactive=False)
|
| 113 |
+
doc = doc_map.get(selected[0]) or {}
|
| 114 |
+
pages = doc.get("pages") or []
|
| 115 |
+
page_choices = ["(Tất cả trang)", *[str(p) for p in pages]]
|
| 116 |
+
return gr.update(choices=page_choices, value="(Tất cả trang)", interactive=True)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _upload_pdf(
|
| 120 |
+
file: object | None,
|
| 121 |
+
) -> tuple[str, object, dict[str, Any], object, str, str]:
|
| 122 |
+
if file is None:
|
| 123 |
+
choices, doc_map, page_dropdown, summary, filenames_text = _refresh_docs()
|
| 124 |
+
return (
|
| 125 |
+
_status_html("⚠️ Vui lòng chọn file PDF."),
|
| 126 |
+
choices,
|
| 127 |
+
doc_map,
|
| 128 |
+
page_dropdown,
|
| 129 |
+
summary,
|
| 130 |
+
filenames_text,
|
| 131 |
+
)
|
| 132 |
+
file_bytes, filename = _read_uploaded_pdf(file)
|
| 133 |
+
info = save_and_ingest_pdf(file_bytes, filename)
|
| 134 |
+
message = _status_html(f"✅ Đã nạp **{info['filename']}** · {info['chunks_indexed']} đoạn")
|
| 135 |
+
choices, doc_map, page_dropdown, summary, filenames_text = _refresh_docs()
|
| 136 |
+
return message, choices, doc_map, page_dropdown, summary, filenames_text
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def _ask(question: str, k: int, selected_docs: list[str], page: str, gemini_key: str) -> tuple[str, str]:
|
| 140 |
+
if not question or not question.strip():
|
| 141 |
+
return "Vui lòng nhập câu hỏi.", ""
|
| 142 |
+
page_num = None if page == "(Tất cả trang)" else int(page)
|
| 143 |
+
set_runtime_gemini_api_key(gemini_key)
|
| 144 |
+
res = answer(question.strip(), k=int(k), filters=_filters(selected_docs, page_num))
|
| 145 |
+
return res.answer, json.dumps(res.model_dump(), ensure_ascii=False, indent=2)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _summarize(query: str, k: int, selected_docs: list[str], page: str, gemini_key: str) -> tuple[str, str]:
|
| 149 |
+
page_num = None if page == "(Tất cả trang)" else int(page)
|
| 150 |
+
set_runtime_gemini_api_key(gemini_key)
|
| 151 |
+
res = summarize(query=query.strip() or None, filters=_filters(selected_docs, page_num), k=int(k))
|
| 152 |
+
return export(res, fmt="md"), res.model_dump_json(indent=2)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def _quiz(
|
| 156 |
+
query: str,
|
| 157 |
+
count: int,
|
| 158 |
+
k: int,
|
| 159 |
+
selected_docs: list[str],
|
| 160 |
+
page: str,
|
| 161 |
+
gemini_key: str,
|
| 162 |
+
) -> tuple[str, str]:
|
| 163 |
+
page_num = None if page == "(Tất cả trang)" else int(page)
|
| 164 |
+
set_runtime_gemini_api_key(gemini_key)
|
| 165 |
+
res = generate_quiz(
|
| 166 |
+
query=query.strip() or None,
|
| 167 |
+
count=int(count),
|
| 168 |
+
filters=_filters(selected_docs, page_num),
|
| 169 |
+
k=int(k),
|
| 170 |
+
)
|
| 171 |
+
return export(res, fmt="md"), res.model_dump_json(indent=2)
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def _flashcards(
|
| 175 |
+
query: str,
|
| 176 |
+
count: int,
|
| 177 |
+
k: int,
|
| 178 |
+
selected_docs: list[str],
|
| 179 |
+
page: str,
|
| 180 |
+
gemini_key: str,
|
| 181 |
+
) -> tuple[str, str]:
|
| 182 |
+
page_num = None if page == "(Tất cả trang)" else int(page)
|
| 183 |
+
set_runtime_gemini_api_key(gemini_key)
|
| 184 |
+
res = generate_flashcards(
|
| 185 |
+
query=query.strip() or None,
|
| 186 |
+
count=int(count),
|
| 187 |
+
filters=_filters(selected_docs, page_num),
|
| 188 |
+
k=int(k),
|
| 189 |
+
)
|
| 190 |
+
return export(res, fmt="md"), res.model_dump_json(indent=2)
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def _write_export(md_text: str, filename: str) -> str | None:
|
| 194 |
+
if not md_text or md_text.startswith("Lỗi:"):
|
| 195 |
+
return None
|
| 196 |
+
output_path = Path("exports") / filename
|
| 197 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 198 |
+
output_path.write_text(md_text, encoding="utf-8")
|
| 199 |
+
return str(output_path)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
_theme = gr.themes.Base().set(
|
| 203 |
+
background_fill_primary="#eef1fb",
|
| 204 |
+
background_fill_secondary="#e4e9f7",
|
| 205 |
+
block_background_fill="transparent",
|
| 206 |
+
block_border_color="transparent",
|
| 207 |
+
block_border_width="0px",
|
| 208 |
+
input_background_fill="#ffffff",
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
with gr.Blocks(title="RAG Learning System", fill_width=True, fill_height=True) as demo:
|
| 212 |
+
with gr.Row(elem_classes="header-row"):
|
| 213 |
+
logo_b64 = _img_b64("static/aivn_logo.png")
|
| 214 |
+
gr.HTML(f'<img src="data:image/png;base64,{logo_b64}" alt="AIVN">')
|
| 215 |
+
gr.HTML(
|
| 216 |
+
'<div class="header-meta">'
|
| 217 |
+
'<p class="header-title">📚 RAG Learning System</p>'
|
| 218 |
+
'<p class="header-sub">AIO2025 — Hỏi đáp · Tóm tắt · Quiz · Flashcards có trích dẫn nguồn</p>'
|
| 219 |
+
'</div>'
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
gr.HTML(_INFO_NOTE_HTML)
|
| 223 |
+
|
| 224 |
+
doc_map_state = gr.State({})
|
| 225 |
+
|
| 226 |
+
with gr.Row(equal_height=False, elem_classes="main-layout"):
|
| 227 |
+
with gr.Column(scale=4, min_width=340, elem_classes="control-stack"):
|
| 228 |
+
gr.Markdown("### 📥 Nạp tài liệu PDF")
|
| 229 |
+
upload = gr.File(
|
| 230 |
+
label="Chọn PDF",
|
| 231 |
+
file_types=[".pdf"],
|
| 232 |
+
file_count="single",
|
| 233 |
+
type="filepath",
|
| 234 |
+
)
|
| 235 |
+
upload_btn = gr.Button("Nạp & Index", elem_classes="gen-btn")
|
| 236 |
+
upload_status = gr.HTML(_status_html("Sẵn sàng."))
|
| 237 |
+
|
| 238 |
+
# with gr.Group(elem_classes="control-card"):
|
| 239 |
+
with gr.Accordion("🔑 Gemini API Key (tuỳ chọn)", open=False):
|
| 240 |
+
gr.Markdown(
|
| 241 |
+
"API key chỉ dùng trong phiên hiện tại và **không được lưu**.",
|
| 242 |
+
elem_classes="help-markdown",
|
| 243 |
+
)
|
| 244 |
+
gemini_key_input = gr.Textbox(
|
| 245 |
+
label="Gemini API Key",
|
| 246 |
+
type="password",
|
| 247 |
+
placeholder="AIza...",
|
| 248 |
+
lines=1,
|
| 249 |
+
max_lines=1,
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
with gr.Accordion("❓ Hướng dẫn sử dụng", open=False):
|
| 253 |
+
gr.Markdown(_USAGE_MARKDOWN, elem_classes="help-markdown")
|
| 254 |
+
|
| 255 |
+
with gr.Accordion("🔐 Cấu hình chạy trên Space", open=False):
|
| 256 |
+
gr.Markdown(
|
| 257 |
+
f"""
|
| 258 |
+
- LLM model: `{settings.llm_model}`
|
| 259 |
+
- Embedding model: `{settings.embedding_model}`
|
| 260 |
+
- Collection: `{settings.qdrant_collection}`
|
| 261 |
+
- Data dir: `{settings.data_dir}`
|
| 262 |
+
- Storage dir: `{settings.storage_dir}`
|
| 263 |
+
""",
|
| 264 |
+
elem_classes="help-markdown",
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
with gr.Column(scale=7, min_width=560, elem_classes="preview-col"):
|
| 268 |
+
gr.HTML(
|
| 269 |
+
'<div class="preview-header"><div>'
|
| 270 |
+
'<p class="preview-title">🗂️ Tài liệu đã index</p>'
|
| 271 |
+
'<p class="preview-sub">Làm mới danh sách sau khi tải PDF, rồi chọn phạm vi học tập trước khi truy vấn.</p>'
|
| 272 |
+
'</div></div>'
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
refresh_btn = gr.Button("Làm mới danh sách tài liệu")
|
| 276 |
+
doc_summary = gr.Markdown("Chưa có tài liệu nào được index.", elem_classes="doc-summary")
|
| 277 |
+
docs = gr.CheckboxGroup(label="Chọn tài liệu", choices=[], value=[])
|
| 278 |
+
page = gr.Dropdown(
|
| 279 |
+
label="Trang (chỉ áp dụng khi chọn đúng 1 tài liệu)",
|
| 280 |
+
choices=["(Tất cả trang)"],
|
| 281 |
+
value="(Tất cả trang)",
|
| 282 |
+
)
|
| 283 |
+
doc_list_md = gr.Markdown("_Danh sách trống_")
|
| 284 |
+
|
| 285 |
+
with gr.Tabs():
|
| 286 |
+
with gr.Tab("💬 Hỏi đáp"):
|
| 287 |
+
q = gr.Textbox(
|
| 288 |
+
label="Câu hỏi",
|
| 289 |
+
lines=2,
|
| 290 |
+
placeholder="Ví dụ: LoRA fine-tuning là gì và tài liệu giải thích điều này ra sao?",
|
| 291 |
+
)
|
| 292 |
+
k_ask = gr.Slider(1, 32, value=6, step=1, label="Top-k retrieval")
|
| 293 |
+
ask_btn = gr.Button("Trả lời", elem_classes="gen-btn")
|
| 294 |
+
ask_md = gr.Markdown(elem_classes="result-markdown")
|
| 295 |
+
with gr.Accordion("JSON debug", open=False):
|
| 296 |
+
ask_raw = gr.Code(label="", language="json")
|
| 297 |
+
|
| 298 |
+
with gr.Tab("📝 Tóm tắt"):
|
| 299 |
+
s_query = gr.Textbox(label="Chủ đề hướng dẫn (tuỳ chọn)", lines=1)
|
| 300 |
+
s_k = gr.Slider(1, 64, value=10, step=1, label="Số đoạn truy xuất (k)")
|
| 301 |
+
s_btn = gr.Button("Tạo tóm tắt", elem_classes="gen-btn")
|
| 302 |
+
s_md = gr.Markdown(elem_classes="result-markdown")
|
| 303 |
+
s_download = gr.File(label="Tải Markdown", interactive=False)
|
| 304 |
+
with gr.Accordion("JSON debug", open=False):
|
| 305 |
+
s_raw = gr.Code(label="", language="json")
|
| 306 |
+
|
| 307 |
+
with gr.Tab("📋 Quiz"):
|
| 308 |
+
z_query = gr.Textbox(label="Chủ đề (tuỳ chọn)", lines=1)
|
| 309 |
+
z_count = gr.Slider(1, 30, value=3, step=1, label="Số câu hỏi")
|
| 310 |
+
z_k = gr.Slider(1, 64, value=10, step=1, label="Số đoạn truy xuất (k)")
|
| 311 |
+
z_btn = gr.Button("Tạo quiz", elem_classes="gen-btn")
|
| 312 |
+
z_md = gr.Markdown(elem_classes="result-markdown")
|
| 313 |
+
z_download = gr.File(label="Tải Markdown", interactive=False)
|
| 314 |
+
with gr.Accordion("JSON debug", open=False):
|
| 315 |
+
z_raw = gr.Code(label="", language="json")
|
| 316 |
+
|
| 317 |
+
with gr.Tab("🃏 Flashcards"):
|
| 318 |
+
f_query = gr.Textbox(label="Chủ đề (tuỳ chọn)", lines=1)
|
| 319 |
+
f_count = gr.Slider(1, 40, value=15, step=1, label="Số thẻ")
|
| 320 |
+
f_k = gr.Slider(1, 64, value=16, step=1, label="Số đoạn truy xuất (k)")
|
| 321 |
+
f_btn = gr.Button("Tạo flashcards", elem_classes="gen-btn")
|
| 322 |
+
f_md = gr.Markdown(elem_classes="result-markdown")
|
| 323 |
+
f_download = gr.File(label="Tải Markdown", interactive=False)
|
| 324 |
+
with gr.Accordion("JSON debug", open=False):
|
| 325 |
+
f_raw = gr.Code(label="", language="json")
|
| 326 |
+
|
| 327 |
+
gr.HTML(
|
| 328 |
+
'<div class="footer-text">'
|
| 329 |
+
'Created by <a href="https://vlai.aivietnam.edu.vn/" target="_blank">VLAI</a>'
|
| 330 |
+
' • <a href="https://aivietnam.edu.vn/" target="_blank">AI VIETNAM</a>'
|
| 331 |
+
'</div>'
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
refresh_btn.click(
|
| 335 |
+
fn=_refresh_docs,
|
| 336 |
+
inputs=[],
|
| 337 |
+
outputs=[docs, doc_map_state, page, doc_summary, doc_list_md],
|
| 338 |
+
)
|
| 339 |
+
docs.change(fn=_pages_for_selection, inputs=[doc_map_state, docs], outputs=[page])
|
| 340 |
+
upload_btn.click(
|
| 341 |
+
fn=_upload_pdf,
|
| 342 |
+
inputs=[upload],
|
| 343 |
+
outputs=[upload_status, docs, doc_map_state, page, doc_summary, doc_list_md],
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
ask_btn.click(fn=_ask, inputs=[q, k_ask, docs, page, gemini_key_input], outputs=[ask_md, ask_raw])
|
| 347 |
+
s_btn.click(fn=_summarize, inputs=[s_query, s_k, docs, page, gemini_key_input], outputs=[s_md, s_raw]).then(
|
| 348 |
+
fn=lambda text: _write_export(text, "summary.md"),
|
| 349 |
+
inputs=[s_md],
|
| 350 |
+
outputs=[s_download],
|
| 351 |
+
)
|
| 352 |
+
z_btn.click(fn=_quiz, inputs=[z_query, z_count, z_k, docs, page, gemini_key_input], outputs=[z_md, z_raw]).then(
|
| 353 |
+
fn=lambda text: _write_export(text, "quiz.md"),
|
| 354 |
+
inputs=[z_md],
|
| 355 |
+
outputs=[z_download],
|
| 356 |
+
)
|
| 357 |
+
f_btn.click(
|
| 358 |
+
fn=_flashcards,
|
| 359 |
+
inputs=[f_query, f_count, f_k, docs, page, gemini_key_input],
|
| 360 |
+
outputs=[f_md, f_raw],
|
| 361 |
+
).then(
|
| 362 |
+
fn=lambda text: _write_export(text, "flashcards.md"),
|
| 363 |
+
inputs=[f_md],
|
| 364 |
+
outputs=[f_download],
|
| 365 |
+
)
|
| 366 |
+
|
| 367 |
+
if __name__ == "__main__":
|
| 368 |
+
demo.queue(default_concurrency_limit=2).launch(
|
| 369 |
+
allowed_paths=["static/aivn_logo.png"],
|
| 370 |
+
css=_CSS, theme=_theme
|
| 371 |
+
)
|
| 372 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio[mcp,oauth]==6.13.0
|
| 2 |
+
|
| 3 |
+
jinja2>=3.1,<4
|
| 4 |
+
loguru>=0.7.3,<0.8
|
| 5 |
+
|
| 6 |
+
pydantic>=2.11.10,<=2.12.5
|
| 7 |
+
pydantic-settings>=2.13,<3
|
| 8 |
+
python-dotenv>=1.2,<2
|
| 9 |
+
|
| 10 |
+
pypdf>=5,<6
|
| 11 |
+
qdrant-client>=1.16,<2
|
| 12 |
+
|
| 13 |
+
langchain-core>=1.3,<2
|
| 14 |
+
langchain-community>=0.4,<0.5
|
| 15 |
+
langchain-text-splitters>=1.1,<2
|
| 16 |
+
langchain-qdrant>=1.1,<2
|
| 17 |
+
|
| 18 |
+
google-genai>=1.0.0
|
| 19 |
+
sentence-transformers>=5.0.0
|
src/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""RAG Learning System core modules for Hugging Face Spaces."""
|
| 2 |
+
|
src/config.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Application configuration for Hugging Face Spaces.
|
| 2 |
+
|
| 3 |
+
Defaults live in code; a small set of values can be overridden via env vars.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
from functools import lru_cache
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Literal
|
| 11 |
+
|
| 12 |
+
from pydantic import Field, model_validator
|
| 13 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class Settings(BaseSettings):
|
| 17 |
+
model_config = SettingsConfigDict(
|
| 18 |
+
env_file=".env",
|
| 19 |
+
env_prefix="RAG_",
|
| 20 |
+
extra="ignore",
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
data_dir: Path = Path("data")
|
| 24 |
+
storage_dir: Path = Path("storage/qdrant")
|
| 25 |
+
qdrant_collection: str = "rag_chunks"
|
| 26 |
+
|
| 27 |
+
chunk_size: int = Field(default=1000, ge=100)
|
| 28 |
+
chunk_overlap: int = Field(default=150, ge=0)
|
| 29 |
+
top_k: int = Field(default=5, ge=1, le=64)
|
| 30 |
+
|
| 31 |
+
llm_provider: Literal["gemini"] = "gemini"
|
| 32 |
+
llm_model: str = "gemini-flash-lite-latest"
|
| 33 |
+
llm_temperature: float = Field(default=0.1, ge=0.0, le=2.0)
|
| 34 |
+
llm_max_new_tokens: int = Field(default=10000, ge=1, le=20000)
|
| 35 |
+
|
| 36 |
+
embedding_provider: Literal["local"] = "local"
|
| 37 |
+
embedding_model: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
| 38 |
+
|
| 39 |
+
gemini_api_key: str | None = Field(default=None, validation_alias="GEMINI_API_KEY")
|
| 40 |
+
|
| 41 |
+
summarize_batch_size: int = Field(default=10, ge=1)
|
| 42 |
+
summarize_retrieval_k: int = Field(default=12, ge=1, le=128)
|
| 43 |
+
generation_retrieval_k: int = Field(default=16, ge=1, le=128)
|
| 44 |
+
quiz_default_count: int = Field(default=8, ge=1, le=50)
|
| 45 |
+
flashcards_default_count: int = Field(default=15, ge=1, le=100)
|
| 46 |
+
|
| 47 |
+
@model_validator(mode="after")
|
| 48 |
+
def validate_config(self) -> "Settings":
|
| 49 |
+
if self.chunk_overlap >= self.chunk_size:
|
| 50 |
+
raise ValueError("chunk_overlap must be smaller than chunk_size.")
|
| 51 |
+
|
| 52 |
+
return self
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@lru_cache(maxsize=1)
|
| 56 |
+
def get_settings() -> Settings:
|
| 57 |
+
return Settings()
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
settings = get_settings()
|
| 61 |
+
|
src/embeddings.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Local embeddings via Sentence-Transformers (no inference provider)."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from functools import lru_cache
|
| 6 |
+
|
| 7 |
+
from langchain_core.embeddings import Embeddings
|
| 8 |
+
from sentence_transformers import SentenceTransformer
|
| 9 |
+
|
| 10 |
+
from src.config import settings
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@lru_cache(maxsize=1)
|
| 14 |
+
def _model() -> SentenceTransformer:
|
| 15 |
+
return SentenceTransformer(settings.embedding_model)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class LocalSentenceTransformerEmbeddings(Embeddings):
|
| 19 |
+
def embed_documents(self, texts: list[str]) -> list[list[float]]: # type: ignore[override]
|
| 20 |
+
vecs = _model().encode(texts, normalize_embeddings=True)
|
| 21 |
+
tolist = getattr(vecs, "tolist", None)
|
| 22 |
+
return tolist() if callable(tolist) else [list(map(float, v)) for v in vecs]
|
| 23 |
+
|
| 24 |
+
def embed_query(self, text: str) -> list[float]: # type: ignore[override]
|
| 25 |
+
vec = _model().encode([text], normalize_embeddings=True)[0]
|
| 26 |
+
tolist = getattr(vec, "tolist", None)
|
| 27 |
+
return tolist() if callable(tolist) else [float(x) for x in vec]
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def get_embeddings() -> Embeddings:
|
| 31 |
+
return LocalSentenceTransformerEmbeddings()
|
| 32 |
+
|
src/export.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Export learning outputs to JSON or Markdown."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Literal
|
| 7 |
+
|
| 8 |
+
from pydantic import BaseModel
|
| 9 |
+
|
| 10 |
+
from src.schemas import Citation, FlashcardSet, QuizSet, Summary
|
| 11 |
+
|
| 12 |
+
ExportFormat = Literal["text", "md", "json"]
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _citation_line(c: Citation) -> str:
|
| 16 |
+
parts = [f"[{c.source_marker}] {c.filename} p.{c.page}"]
|
| 17 |
+
if c.section:
|
| 18 |
+
parts.append(f"section: {c.section}")
|
| 19 |
+
if c.chunk_id:
|
| 20 |
+
parts.append(f"chunk: {c.chunk_id}")
|
| 21 |
+
return " | ".join(parts)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _citations_block(citations: list[Citation]) -> str:
|
| 25 |
+
if not citations:
|
| 26 |
+
return ""
|
| 27 |
+
lines = ["## Sources", ""]
|
| 28 |
+
lines.extend(f"- {_citation_line(c)}" for c in citations)
|
| 29 |
+
return "\n".join(lines) + "\n"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _to_markdown(model: BaseModel) -> str:
|
| 33 |
+
if isinstance(model, Summary):
|
| 34 |
+
title = "# Summary" + (f": {model.target}" if model.target else "")
|
| 35 |
+
lines: list[str] = [title, "", f"_Scope: {model.scope}_", ""]
|
| 36 |
+
if model.summary:
|
| 37 |
+
lines.extend([model.summary.strip(), ""])
|
| 38 |
+
if model.key_points:
|
| 39 |
+
lines.extend(["## Key Points", "", *[f"- {kp}" for kp in model.key_points], ""])
|
| 40 |
+
c = _citations_block(model.citations)
|
| 41 |
+
if c:
|
| 42 |
+
lines.append(c)
|
| 43 |
+
return "\n".join(lines).rstrip() + "\n"
|
| 44 |
+
|
| 45 |
+
if isinstance(model, QuizSet):
|
| 46 |
+
title = "# Quiz" + (f": {model.target}" if model.target else "")
|
| 47 |
+
lines = [title, "", f"_Scope: {model.scope} | Items: {len(model.items)}_", ""]
|
| 48 |
+
for idx, item in enumerate(model.items, start=1):
|
| 49 |
+
meta_parts: list[str] = []
|
| 50 |
+
if item.topic:
|
| 51 |
+
meta_parts.append(f"topic: {item.topic}")
|
| 52 |
+
if item.difficulty:
|
| 53 |
+
meta_parts.append(f"difficulty: {item.difficulty}")
|
| 54 |
+
meta_suffix = f" _({' | '.join(meta_parts)})_" if meta_parts else ""
|
| 55 |
+
|
| 56 |
+
lines.extend([f"## Q{idx}.{meta_suffix}", "", item.question.strip(), ""])
|
| 57 |
+
for opt_idx, option in enumerate(item.options):
|
| 58 |
+
lines.append(f"- {chr(ord('A') + opt_idx)}) {option}")
|
| 59 |
+
lines.append("")
|
| 60 |
+
lines.append(f"**Answer:** {chr(ord('A') + item.correct_index)}")
|
| 61 |
+
if item.explanation:
|
| 62 |
+
lines.append(f"**Explanation:** {item.explanation.strip()}")
|
| 63 |
+
if item.source_markers:
|
| 64 |
+
lines.append(f"**Sources:** {', '.join(item.source_markers)}")
|
| 65 |
+
lines.append("")
|
| 66 |
+
|
| 67 |
+
c = _citations_block(model.citations)
|
| 68 |
+
if c:
|
| 69 |
+
lines.append(c)
|
| 70 |
+
return "\n".join(lines).rstrip() + "\n"
|
| 71 |
+
|
| 72 |
+
if isinstance(model, FlashcardSet):
|
| 73 |
+
title = "# Flashcards" + (f": {model.target}" if model.target else "")
|
| 74 |
+
lines = [title, "", f"_Scope: {model.scope} | Cards: {len(model.cards)}_", ""]
|
| 75 |
+
for idx, card in enumerate(model.cards, start=1):
|
| 76 |
+
topic = f" — {card.topic}" if card.topic else ""
|
| 77 |
+
lines.extend([f"## Card {idx}{topic}", ""])
|
| 78 |
+
lines.append(f"**Front:** {card.front.strip()}")
|
| 79 |
+
lines.append(f"**Back:** {card.back.strip()}")
|
| 80 |
+
if card.hint:
|
| 81 |
+
lines.append(f"**Hint:** {card.hint.strip()}")
|
| 82 |
+
if card.source_markers:
|
| 83 |
+
lines.append(f"**Sources:** {', '.join(card.source_markers)}")
|
| 84 |
+
lines.append("")
|
| 85 |
+
|
| 86 |
+
c = _citations_block(model.citations)
|
| 87 |
+
if c:
|
| 88 |
+
lines.append(c)
|
| 89 |
+
return "\n".join(lines).rstrip() + "\n"
|
| 90 |
+
|
| 91 |
+
raise TypeError(f"Unsupported model type: {type(model).__name__}")
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def export(
|
| 95 |
+
model: BaseModel, *, fmt: ExportFormat = "text", output: Path | None = None
|
| 96 |
+
) -> str | Path:
|
| 97 |
+
"""Render model to a string, optionally writing it to disk.
|
| 98 |
+
|
| 99 |
+
Args: model, fmt, output (optional).
|
| 100 |
+
Returns: rendered string if output is None; otherwise the written path.
|
| 101 |
+
Raises: TypeError for unsupported model type; ValueError for unknown fmt.
|
| 102 |
+
"""
|
| 103 |
+
if fmt == "json":
|
| 104 |
+
text = model.model_dump_json(indent=2) + "\n"
|
| 105 |
+
elif fmt in {"text", "md"}:
|
| 106 |
+
text = _to_markdown(model)
|
| 107 |
+
else:
|
| 108 |
+
raise ValueError(f"Unknown fmt '{fmt}'. Expected 'text' | 'md' | 'json'.")
|
| 109 |
+
|
| 110 |
+
if output is None:
|
| 111 |
+
return text
|
| 112 |
+
|
| 113 |
+
output.parent.mkdir(parents=True, exist_ok=True)
|
| 114 |
+
output.write_text(text, encoding="utf-8")
|
| 115 |
+
return output
|
| 116 |
+
|
src/filters.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared metadata filtering utilities across the app."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from pydantic import BaseModel, model_validator
|
| 6 |
+
from qdrant_client.http import models as qmodels
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class MetadataFilter(BaseModel):
|
| 10 |
+
"""Filter applied against indexed chunk metadata."""
|
| 11 |
+
|
| 12 |
+
filename: str | None = None
|
| 13 |
+
filenames: list[str] | None = None
|
| 14 |
+
page: int | None = None
|
| 15 |
+
section: str | None = None
|
| 16 |
+
document_id: str | None = None
|
| 17 |
+
|
| 18 |
+
@model_validator(mode="after")
|
| 19 |
+
def _normalize(self) -> "MetadataFilter":
|
| 20 |
+
names = [x for x in (self.filenames or []) if isinstance(x, str) and x.strip()]
|
| 21 |
+
names = [n.strip() for n in names if n.strip()]
|
| 22 |
+
if not names:
|
| 23 |
+
self.filenames = None
|
| 24 |
+
elif len(names) == 1:
|
| 25 |
+
self.filename, self.filenames = names[0], None
|
| 26 |
+
else:
|
| 27 |
+
# Multi-doc selection: page filter becomes ambiguous, so drop it.
|
| 28 |
+
self.filename, self.filenames, self.page = None, names, None
|
| 29 |
+
if self.filename is not None:
|
| 30 |
+
self.filename = self.filename.strip() or None
|
| 31 |
+
if self.section is not None:
|
| 32 |
+
self.section = self.section.strip() or None
|
| 33 |
+
if self.document_id is not None:
|
| 34 |
+
self.document_id = self.document_id.strip() or None
|
| 35 |
+
return self
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def coerce_filter(filters: MetadataFilter | dict[str, object] | None) -> MetadataFilter | None:
|
| 39 |
+
"""Coerce a dict (or None) into a normalized `MetadataFilter`."""
|
| 40 |
+
if filters is None:
|
| 41 |
+
return None
|
| 42 |
+
if isinstance(filters, MetadataFilter):
|
| 43 |
+
return filters
|
| 44 |
+
if isinstance(filters, dict):
|
| 45 |
+
return MetadataFilter.model_validate(filters)
|
| 46 |
+
raise TypeError(f"Unsupported filters type: {type(filters).__name__}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def filters_to_dict(filters: MetadataFilter | dict[str, object] | None) -> dict[str, object] | None:
|
| 50 |
+
"""Return normalized flat dict suitable for downstream filtering."""
|
| 51 |
+
f = coerce_filter(filters)
|
| 52 |
+
if f is None:
|
| 53 |
+
return None
|
| 54 |
+
return f.model_dump(exclude_none=True) or None
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def filters_to_qdrant(filters: MetadataFilter | dict[str, object] | None) -> qmodels.Filter | None:
|
| 58 |
+
"""Build a Qdrant filter from normalized metadata filters."""
|
| 59 |
+
flat = filters_to_dict(filters)
|
| 60 |
+
if not flat:
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
conditions: list[qmodels.FieldCondition] = []
|
| 64 |
+
for field, value in flat.items():
|
| 65 |
+
if value is None:
|
| 66 |
+
continue
|
| 67 |
+
|
| 68 |
+
if field == "filenames" and isinstance(value, list):
|
| 69 |
+
names = [x for x in value if isinstance(x, str) and x]
|
| 70 |
+
if names:
|
| 71 |
+
conditions.append(
|
| 72 |
+
qmodels.FieldCondition(
|
| 73 |
+
key="metadata.filename", match=qmodels.MatchAny(any=names)
|
| 74 |
+
)
|
| 75 |
+
)
|
| 76 |
+
continue
|
| 77 |
+
|
| 78 |
+
if isinstance(value, (str, int)):
|
| 79 |
+
conditions.append(
|
| 80 |
+
qmodels.FieldCondition(
|
| 81 |
+
key=f"metadata.{field}", match=qmodels.MatchValue(value=value)
|
| 82 |
+
)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
return qmodels.Filter(must=conditions) if conditions else None
|
| 86 |
+
|
src/indexing.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Load PDFs, split into chunks with metadata, and index into Qdrant."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import hashlib
|
| 6 |
+
import uuid
|
| 7 |
+
from collections import defaultdict
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Protocol
|
| 10 |
+
|
| 11 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 12 |
+
from langchain_core.documents import Document
|
| 13 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 14 |
+
from loguru import logger
|
| 15 |
+
|
| 16 |
+
from src.config import settings
|
| 17 |
+
from src.schemas import ChunkMetadata
|
| 18 |
+
from src.store import ensure_collection, get_vector_store
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class Chunker(Protocol):
|
| 22 |
+
def split_documents(self, documents: list[Document]) -> list[Document]:
|
| 23 |
+
"""Split page-level documents into chunk-level documents."""
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _splitter(
|
| 27 |
+
chunk_size: int | None = None, chunk_overlap: int | None = None
|
| 28 |
+
) -> RecursiveCharacterTextSplitter:
|
| 29 |
+
size = chunk_size or settings.chunk_size
|
| 30 |
+
overlap = chunk_overlap or settings.chunk_overlap
|
| 31 |
+
|
| 32 |
+
return RecursiveCharacterTextSplitter(
|
| 33 |
+
chunk_size=size,
|
| 34 |
+
chunk_overlap=overlap,
|
| 35 |
+
separators=["\n\n", "\n", ". ", " ", ""],
|
| 36 |
+
keep_separator=False,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _document_id(path: Path) -> str:
|
| 41 |
+
raw = f"{path.name}:{path.stat().st_size}"
|
| 42 |
+
return hashlib.sha1(raw.encode("utf-8")).hexdigest()[:16]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _chunk_id(doc_id: str, page: int, index: int) -> str:
|
| 46 |
+
return f"{doc_id}:{page}:{index}"
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _load_pdf(path: Path) -> list[Document]:
|
| 50 |
+
loader = PyPDFLoader(str(path))
|
| 51 |
+
pages = loader.load()
|
| 52 |
+
doc_id = _document_id(path)
|
| 53 |
+
for doc in pages:
|
| 54 |
+
page_number = int(doc.metadata.get("page", 0)) + 1
|
| 55 |
+
doc.metadata = {
|
| 56 |
+
"document_id": doc_id,
|
| 57 |
+
"filename": path.name,
|
| 58 |
+
"source": str(path.resolve()),
|
| 59 |
+
"page": page_number,
|
| 60 |
+
"section": doc.metadata.get("section"),
|
| 61 |
+
}
|
| 62 |
+
return pages
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def discover_pdfs(data_dir: Path | None = None) -> list[Path]:
|
| 66 |
+
directory = data_dir or settings.data_dir
|
| 67 |
+
if not directory.exists():
|
| 68 |
+
return []
|
| 69 |
+
return sorted(p for p in directory.iterdir() if p.is_file() and p.suffix.lower() == ".pdf")
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def build_chunks(
|
| 73 |
+
pdf_paths: list[Path],
|
| 74 |
+
chunk_size: int | None = None,
|
| 75 |
+
chunk_overlap: int | None = None,
|
| 76 |
+
chunker: Chunker | None = None,
|
| 77 |
+
) -> list[Document]:
|
| 78 |
+
page_docs: list[Document] = []
|
| 79 |
+
for path in pdf_paths:
|
| 80 |
+
logger.info("Loading PDF: {}", path.name)
|
| 81 |
+
page_docs.extend(_load_pdf(path))
|
| 82 |
+
|
| 83 |
+
if chunker is None:
|
| 84 |
+
chunks = _splitter(chunk_size, chunk_overlap).split_documents(page_docs)
|
| 85 |
+
else:
|
| 86 |
+
chunks = chunker.split_documents(page_docs)
|
| 87 |
+
|
| 88 |
+
per_doc_counter: dict[str, int] = defaultdict(int)
|
| 89 |
+
for chunk in chunks:
|
| 90 |
+
doc_id = chunk.metadata["document_id"]
|
| 91 |
+
idx = per_doc_counter[doc_id]
|
| 92 |
+
per_doc_counter[doc_id] += 1
|
| 93 |
+
meta = ChunkMetadata(
|
| 94 |
+
document_id=doc_id,
|
| 95 |
+
filename=chunk.metadata["filename"],
|
| 96 |
+
source=chunk.metadata["source"],
|
| 97 |
+
page=chunk.metadata["page"],
|
| 98 |
+
chunk_id=_chunk_id(doc_id, chunk.metadata["page"], idx),
|
| 99 |
+
section=chunk.metadata.get("section"),
|
| 100 |
+
)
|
| 101 |
+
chunk.metadata = meta.model_dump()
|
| 102 |
+
return chunks
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def index_chunks(chunks: list[Document], collection_name: str | None = None) -> int:
|
| 106 |
+
"""Compute deterministic UUIDs and add chunks to the vector store.
|
| 107 |
+
|
| 108 |
+
Re-ingesting the same content upserts instead of creating duplicates.
|
| 109 |
+
"""
|
| 110 |
+
if not chunks:
|
| 111 |
+
return 0
|
| 112 |
+
ids = [str(uuid.uuid5(uuid.NAMESPACE_DNS, c.metadata["chunk_id"])) for c in chunks]
|
| 113 |
+
get_vector_store(collection_name=collection_name).add_documents(chunks, ids=ids)
|
| 114 |
+
return len(chunks)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def ingest(
|
| 118 |
+
recreate: bool = False,
|
| 119 |
+
collection_name: str | None = None,
|
| 120 |
+
chunker: Chunker | None = None,
|
| 121 |
+
chunk_size: int | None = None,
|
| 122 |
+
chunk_overlap: int | None = None,
|
| 123 |
+
) -> int:
|
| 124 |
+
pdfs = discover_pdfs()
|
| 125 |
+
if not pdfs:
|
| 126 |
+
logger.warning("No PDF files found in {}", settings.data_dir)
|
| 127 |
+
return 0
|
| 128 |
+
|
| 129 |
+
ensure_collection(recreate=recreate, collection_name=collection_name)
|
| 130 |
+
chunks = build_chunks(
|
| 131 |
+
pdfs,
|
| 132 |
+
chunker=chunker,
|
| 133 |
+
chunk_size=chunk_size,
|
| 134 |
+
chunk_overlap=chunk_overlap,
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
if not chunks:
|
| 138 |
+
logger.warning("No chunks produced from {} PDF(s)", len(pdfs))
|
| 139 |
+
return 0
|
| 140 |
+
|
| 141 |
+
count = index_chunks(chunks, collection_name=collection_name)
|
| 142 |
+
logger.info("Ingested {} chunks from {} PDF(s)", count, len(pdfs))
|
| 143 |
+
return count
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def save_and_ingest_pdf(file_bytes: bytes, filename: str) -> dict[str, object]:
|
| 147 |
+
"""Save an uploaded PDF to `data_dir` and ingest it into Qdrant.
|
| 148 |
+
|
| 149 |
+
Args: file_bytes, filename. Returns: {"filename", "chunks_indexed"}. Raises: ValueError.
|
| 150 |
+
"""
|
| 151 |
+
if not filename:
|
| 152 |
+
raise ValueError("Filename is required.")
|
| 153 |
+
if not filename.lower().endswith(".pdf"):
|
| 154 |
+
raise ValueError("Only PDF files are accepted.")
|
| 155 |
+
if not file_bytes:
|
| 156 |
+
raise ValueError("Uploaded file is empty.")
|
| 157 |
+
|
| 158 |
+
safe_name = Path(filename).name
|
| 159 |
+
settings.data_dir.mkdir(parents=True, exist_ok=True)
|
| 160 |
+
dest = settings.data_dir / safe_name
|
| 161 |
+
dest.write_bytes(file_bytes)
|
| 162 |
+
logger.info("Saved uploaded PDF: {}", dest)
|
| 163 |
+
|
| 164 |
+
ensure_collection(recreate=False)
|
| 165 |
+
chunks = build_chunks([dest])
|
| 166 |
+
if not chunks:
|
| 167 |
+
logger.warning("No chunks produced for uploaded file {}", safe_name)
|
| 168 |
+
return {"filename": safe_name, "chunks_indexed": 0}
|
| 169 |
+
|
| 170 |
+
count = index_chunks(chunks)
|
| 171 |
+
logger.info("Indexed {} chunks from {}", count, safe_name)
|
| 172 |
+
return {"filename": safe_name, "chunks_indexed": count}
|
| 173 |
+
|
src/learning.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Grounded learning features: summarization, quiz, and flashcard generation."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
from loguru import logger
|
| 8 |
+
from pydantic import ValidationError
|
| 9 |
+
|
| 10 |
+
from src.config import settings
|
| 11 |
+
from src.llm import invoke_llm
|
| 12 |
+
from src.rag import fetch_all_chunks, format_citations, render_prompt, retrieve
|
| 13 |
+
from src.schemas import Flashcard, FlashcardSet, QuizItem, QuizSet, RetrievedChunk, Summary
|
| 14 |
+
|
| 15 |
+
SUMMARY_SINGLE_TEMPLATE = "summary_single.jinja2"
|
| 16 |
+
SUMMARY_MAP_TEMPLATE = "summary_map.jinja2"
|
| 17 |
+
SUMMARY_REDUCE_TEMPLATE = "summary_reduce.jinja2"
|
| 18 |
+
QUIZ_TEMPLATE = "quiz.jinja2"
|
| 19 |
+
FLASHCARDS_TEMPLATE = "flashcards.jinja2"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _parse_json(text: str) -> dict | list:
|
| 23 |
+
"""Parse JSON object/array from model output, allowing optional markdown code fences."""
|
| 24 |
+
cleaned = text.strip()
|
| 25 |
+
if cleaned.startswith("```"):
|
| 26 |
+
cleaned = cleaned.split("\n", 1)[-1].removesuffix("```").strip()
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
obj = json.loads(cleaned)
|
| 30 |
+
except json.JSONDecodeError as e:
|
| 31 |
+
raise RuntimeError(f"Invalid JSON from model output: {cleaned}") from e
|
| 32 |
+
|
| 33 |
+
if not isinstance(obj, (dict, list)):
|
| 34 |
+
raise RuntimeError(f"Expected JSON object or array, got {type(obj).__name__}.")
|
| 35 |
+
|
| 36 |
+
return obj
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _resolve_target(
|
| 40 |
+
document: str | None,
|
| 41 |
+
query: str | None,
|
| 42 |
+
filters: dict[str, object] | None,
|
| 43 |
+
k: int | None,
|
| 44 |
+
retrieval_k: int,
|
| 45 |
+
) -> tuple[list[RetrievedChunk], str, str | None]:
|
| 46 |
+
"""Resolve input options into (chunks, scope, target_label)."""
|
| 47 |
+
effective_filters: dict[str, object] = dict(filters or {})
|
| 48 |
+
if document:
|
| 49 |
+
effective_filters["filename"] = document
|
| 50 |
+
|
| 51 |
+
if query:
|
| 52 |
+
chunks = retrieve(query, k=k or retrieval_k, filters=effective_filters)
|
| 53 |
+
target: str | None = query
|
| 54 |
+
scope = "query"
|
| 55 |
+
elif effective_filters:
|
| 56 |
+
chunks = fetch_all_chunks(filters=effective_filters)
|
| 57 |
+
target = ", ".join(f"{fk}={fv}" for fk, fv in effective_filters.items())
|
| 58 |
+
scope = "document" if document else "filter"
|
| 59 |
+
else:
|
| 60 |
+
chunks = fetch_all_chunks(filters=None)
|
| 61 |
+
target = None
|
| 62 |
+
scope = "corpus"
|
| 63 |
+
|
| 64 |
+
return chunks, scope, target
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _validate_items(
|
| 68 |
+
payload: object,
|
| 69 |
+
key: str,
|
| 70 |
+
model_class: type,
|
| 71 |
+
dedup_field: str,
|
| 72 |
+
label: str,
|
| 73 |
+
valid_markers: set[str],
|
| 74 |
+
) -> list:
|
| 75 |
+
if not isinstance(payload, dict):
|
| 76 |
+
raise RuntimeError(f"Expected JSON object for {label}.")
|
| 77 |
+
raw_items = payload.get(key)
|
| 78 |
+
if not isinstance(raw_items, list):
|
| 79 |
+
raise RuntimeError(f"Expected '{key}' to be a list for {label}.")
|
| 80 |
+
|
| 81 |
+
items: list = []
|
| 82 |
+
seen: set[str] = set()
|
| 83 |
+
for raw in raw_items:
|
| 84 |
+
if not isinstance(raw, dict):
|
| 85 |
+
continue
|
| 86 |
+
try:
|
| 87 |
+
item = model_class.model_validate(raw)
|
| 88 |
+
except ValidationError as e:
|
| 89 |
+
logger.warning("Dropping invalid {}: {}", label, e)
|
| 90 |
+
continue
|
| 91 |
+
norm = str(getattr(item, dedup_field, "")).strip().lower()
|
| 92 |
+
if not norm or norm in seen:
|
| 93 |
+
continue
|
| 94 |
+
seen.add(norm)
|
| 95 |
+
markers = [m for m in item.source_markers if m in valid_markers]
|
| 96 |
+
items.append(item.model_copy(update={"source_markers": markers}))
|
| 97 |
+
|
| 98 |
+
if not items:
|
| 99 |
+
raise RuntimeError(f"No valid {label} produced.")
|
| 100 |
+
return items
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def _validate_summary_payload(payload: object) -> tuple[str, list[str]]:
|
| 104 |
+
if not isinstance(payload, dict):
|
| 105 |
+
raise RuntimeError("Expected a JSON object for summary.")
|
| 106 |
+
summary = payload.get("summary")
|
| 107 |
+
key_points = payload.get("key_points", [])
|
| 108 |
+
if not isinstance(summary, str):
|
| 109 |
+
raise RuntimeError("Summary payload missing 'summary' string.")
|
| 110 |
+
if not isinstance(key_points, list) or not all(isinstance(x, str) for x in key_points):
|
| 111 |
+
raise RuntimeError("Summary payload 'key_points' must be a list of strings.")
|
| 112 |
+
return summary.strip(), [kp.strip() for kp in key_points if kp.strip()]
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def summarize(
|
| 116 |
+
document: str | None = None,
|
| 117 |
+
query: str | None = None,
|
| 118 |
+
filters: dict[str, object] | None = None,
|
| 119 |
+
k: int | None = None,
|
| 120 |
+
) -> Summary:
|
| 121 |
+
"""Grounded summary; uses map-reduce when chunk count exceeds batch size."""
|
| 122 |
+
chunks, scope, target = _resolve_target(
|
| 123 |
+
document=document,
|
| 124 |
+
query=query,
|
| 125 |
+
filters=filters,
|
| 126 |
+
k=k,
|
| 127 |
+
retrieval_k=settings.summarize_retrieval_k,
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
if not chunks:
|
| 131 |
+
raise RuntimeError("No chunks available for summarization.")
|
| 132 |
+
|
| 133 |
+
batch_size = settings.summarize_batch_size
|
| 134 |
+
if len(chunks) <= batch_size:
|
| 135 |
+
prompt = render_prompt(SUMMARY_SINGLE_TEMPLATE, chunks=chunks)
|
| 136 |
+
payload = _parse_json(invoke_llm(prompt))
|
| 137 |
+
summary_text, key_points = _validate_summary_payload(payload)
|
| 138 |
+
else:
|
| 139 |
+
n_batches = (len(chunks) + batch_size - 1) // batch_size
|
| 140 |
+
partials: list[dict] = []
|
| 141 |
+
|
| 142 |
+
for batch_index, start in enumerate(range(0, len(chunks), batch_size), start=1):
|
| 143 |
+
logger.info("Summarizing batch {}/{}", batch_index, n_batches)
|
| 144 |
+
batch = chunks[start : start + batch_size]
|
| 145 |
+
prompt = render_prompt(SUMMARY_MAP_TEMPLATE, chunks=batch)
|
| 146 |
+
payload = _parse_json(invoke_llm(prompt))
|
| 147 |
+
summary_text, key_points = _validate_summary_payload(payload)
|
| 148 |
+
partials.append({"summary": summary_text, "key_points": key_points})
|
| 149 |
+
|
| 150 |
+
reduce_prompt = render_prompt(SUMMARY_REDUCE_TEMPLATE, partials=partials)
|
| 151 |
+
payload = _parse_json(invoke_llm(reduce_prompt))
|
| 152 |
+
summary_text, key_points = _validate_summary_payload(payload)
|
| 153 |
+
|
| 154 |
+
return Summary(
|
| 155 |
+
scope=scope,
|
| 156 |
+
target=target,
|
| 157 |
+
summary=summary_text,
|
| 158 |
+
key_points=key_points,
|
| 159 |
+
citations=format_citations(chunks),
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def generate_quiz(
|
| 164 |
+
document: str | None = None,
|
| 165 |
+
query: str | None = None,
|
| 166 |
+
filters: dict[str, object] | None = None,
|
| 167 |
+
count: int | None = None,
|
| 168 |
+
k: int | None = None,
|
| 169 |
+
) -> QuizSet:
|
| 170 |
+
"""Grounded multiple-choice quiz; raises RuntimeError if output is unparseable."""
|
| 171 |
+
chunks, scope, target = _resolve_target(
|
| 172 |
+
document=document,
|
| 173 |
+
query=query,
|
| 174 |
+
filters=filters,
|
| 175 |
+
k=k,
|
| 176 |
+
retrieval_k=settings.generation_retrieval_k,
|
| 177 |
+
)
|
| 178 |
+
if not chunks:
|
| 179 |
+
raise RuntimeError("No chunks available for quiz generation.")
|
| 180 |
+
|
| 181 |
+
n = count or settings.quiz_default_count
|
| 182 |
+
valid_markers = {f"S{i}" for i in range(1, len(chunks) + 1)}
|
| 183 |
+
|
| 184 |
+
prompt = render_prompt(QUIZ_TEMPLATE, chunks=chunks, count=n)
|
| 185 |
+
payload = _parse_json(invoke_llm(prompt))
|
| 186 |
+
items = _validate_items(payload, "items", QuizItem, "question", "quiz items", valid_markers)
|
| 187 |
+
|
| 188 |
+
return QuizSet(
|
| 189 |
+
scope=scope,
|
| 190 |
+
target=target,
|
| 191 |
+
items=items,
|
| 192 |
+
citations=format_citations(chunks),
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def generate_flashcards(
|
| 197 |
+
document: str | None = None,
|
| 198 |
+
query: str | None = None,
|
| 199 |
+
filters: dict[str, object] | None = None,
|
| 200 |
+
count: int | None = None,
|
| 201 |
+
k: int | None = None,
|
| 202 |
+
) -> FlashcardSet:
|
| 203 |
+
"""Grounded flashcard set for spaced repetition; raises RuntimeError if output is unparseable."""
|
| 204 |
+
chunks, scope, target = _resolve_target(
|
| 205 |
+
document=document,
|
| 206 |
+
query=query,
|
| 207 |
+
filters=filters,
|
| 208 |
+
k=k,
|
| 209 |
+
retrieval_k=settings.generation_retrieval_k,
|
| 210 |
+
)
|
| 211 |
+
if not chunks:
|
| 212 |
+
raise RuntimeError("No chunks available for flashcard generation.")
|
| 213 |
+
|
| 214 |
+
n = count or settings.flashcards_default_count
|
| 215 |
+
valid_markers = {f"S{i}" for i in range(1, len(chunks) + 1)}
|
| 216 |
+
|
| 217 |
+
prompt = render_prompt(FLASHCARDS_TEMPLATE, chunks=chunks, count=n)
|
| 218 |
+
payload = _parse_json(invoke_llm(prompt))
|
| 219 |
+
cards = _validate_items(payload, "cards", Flashcard, "front", "flashcards", valid_markers)
|
| 220 |
+
|
| 221 |
+
return FlashcardSet(
|
| 222 |
+
scope=scope,
|
| 223 |
+
target=target,
|
| 224 |
+
cards=cards,
|
| 225 |
+
citations=format_citations(chunks),
|
| 226 |
+
)
|
| 227 |
+
|
src/llm.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLM invocation through Gemini API (google-genai)."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from contextvars import ContextVar
|
| 6 |
+
|
| 7 |
+
from google import genai
|
| 8 |
+
from google.genai import types
|
| 9 |
+
|
| 10 |
+
from src.config import settings
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
_runtime_api_key: ContextVar[str | None] = ContextVar("runtime_gemini_api_key", default=None)
|
| 14 |
+
|
| 15 |
+
def set_runtime_gemini_api_key(api_key: str | None) -> None:
|
| 16 |
+
_runtime_api_key.set(api_key.strip() if isinstance(api_key, str) else None)
|
| 17 |
+
|
| 18 |
+
def invoke_llm(prompt: str) -> str:
|
| 19 |
+
"""Invoke the configured chat model and return plain text.
|
| 20 |
+
|
| 21 |
+
Args: prompt.
|
| 22 |
+
Returns: assistant text content.
|
| 23 |
+
Raises: RuntimeError if the API key is missing or response is empty.
|
| 24 |
+
"""
|
| 25 |
+
api_key = _runtime_api_key.get() or settings.gemini_api_key
|
| 26 |
+
if not api_key:
|
| 27 |
+
raise RuntimeError("Missing Gemini API key. Please provide it in the UI (or set GEMINI_API_KEY).")
|
| 28 |
+
|
| 29 |
+
client = genai.Client(api_key=api_key)
|
| 30 |
+
resp = client.models.generate_content(
|
| 31 |
+
model=settings.llm_model,
|
| 32 |
+
contents=prompt,
|
| 33 |
+
config=types.GenerateContentConfig(
|
| 34 |
+
temperature=settings.llm_temperature,
|
| 35 |
+
max_output_tokens=settings.llm_max_new_tokens,
|
| 36 |
+
),
|
| 37 |
+
)
|
| 38 |
+
text = (resp.text or "").strip()
|
| 39 |
+
if not text:
|
| 40 |
+
raise RuntimeError("Empty response from Gemini.")
|
| 41 |
+
return text
|
| 42 |
+
|
src/prompts/answer.jinja2
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a precise assistant. Answer the user's question using ONLY the context below.
|
| 2 |
+
|
| 3 |
+
Rules:
|
| 4 |
+
- Use only facts explicitly supported by the context. Do not invent details.
|
| 5 |
+
- If the context is insufficient, reply exactly: "Tôi không có đủ thông tin trong ngữ cảnh được cung cấp để trả lời."
|
| 6 |
+
- Be concise and direct.
|
| 7 |
+
- Write your answer in Vietnamese.
|
| 8 |
+
- Cite support inline using source markers like [S1], [S2].
|
| 9 |
+
- Use only the source markers provided in the context.
|
| 10 |
+
- Do not write filenames, page numbers, or chunk IDs in the answer body.
|
| 11 |
+
|
| 12 |
+
Context:
|
| 13 |
+
{% for c in chunks %}
|
| 14 |
+
---
|
| 15 |
+
[source={{ "S" ~ loop.index }}]
|
| 16 |
+
{{ c.text }}
|
| 17 |
+
{% endfor %}
|
| 18 |
+
|
| 19 |
+
Question: {{ question }}
|
| 20 |
+
|
| 21 |
+
Answer:
|
| 22 |
+
|
src/prompts/flashcards.jinja2
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are generating study flashcards grounded in the provided context.
|
| 2 |
+
|
| 3 |
+
Rules:
|
| 4 |
+
- Generate exactly {{ count }} flashcards.
|
| 5 |
+
- Fronts should ask for a concept, term, distinction, formula, process, or short explanation.
|
| 6 |
+
- Backs must be concise but complete enough for revision.
|
| 7 |
+
- Ground every card in the context. Do not invent facts.
|
| 8 |
+
- Avoid low-value cards that only restate section headings.
|
| 9 |
+
- Avoid duplicate or overly similar cards.
|
| 10 |
+
- Provide at least one [S#] source marker per card when possible.
|
| 11 |
+
- If the context is insufficient for {{ count }} useful cards, produce fewer rather than pad.
|
| 12 |
+
- Write all fronts, backs, hints, and topic labels in Vietnamese.
|
| 13 |
+
|
| 14 |
+
Output STRICTLY valid JSON and no other text, with this exact shape:
|
| 15 |
+
{
|
| 16 |
+
"cards": [
|
| 17 |
+
{
|
| 18 |
+
"front": "string",
|
| 19 |
+
"back": "string",
|
| 20 |
+
"hint": "optional string or null",
|
| 21 |
+
"topic": "optional short topic label or null",
|
| 22 |
+
"source_markers": ["S1"]
|
| 23 |
+
}
|
| 24 |
+
]
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
Context:
|
| 28 |
+
{% for c in chunks %}
|
| 29 |
+
---
|
| 30 |
+
[source=S{{ loop.index }}] ({{ c.metadata.filename }} p.{{ c.metadata.page }})
|
| 31 |
+
{{ c.text }}
|
| 32 |
+
{% endfor %}
|
| 33 |
+
|
| 34 |
+
Respond with ONLY the JSON object.
|
| 35 |
+
|
src/prompts/quiz.jinja2
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are generating high-quality multiple-choice quiz items grounded in the provided context.
|
| 2 |
+
|
| 3 |
+
Rules:
|
| 4 |
+
- Generate exactly {{ count }} items.
|
| 5 |
+
- Every question must be answerable from the context alone.
|
| 6 |
+
- Provide exactly 4 options per question with exactly one correct answer.
|
| 7 |
+
- Test understanding: concepts, distinctions, reasoning, and factual recall.
|
| 8 |
+
- Avoid duplicates or near-duplicates.
|
| 9 |
+
- Avoid trick questions or ambiguous wording.
|
| 10 |
+
- Explanations must be concise and cite at least one [S#] marker from the context.
|
| 11 |
+
- If the context is insufficient to create {{ count }} high-quality items, generate fewer. Never fabricate facts.
|
| 12 |
+
- Write all questions, options, explanations, and topic labels in Vietnamese.
|
| 13 |
+
|
| 14 |
+
Output STRICTLY valid JSON and no other text, with this exact shape:
|
| 15 |
+
{
|
| 16 |
+
"items": [
|
| 17 |
+
{
|
| 18 |
+
"question": "string",
|
| 19 |
+
"options": ["string", "string", "string", "string"],
|
| 20 |
+
"correct_index": 0,
|
| 21 |
+
"explanation": "grounded explanation with [S1] style citations",
|
| 22 |
+
"source_markers": ["S1"],
|
| 23 |
+
"difficulty": "easy|medium|hard",
|
| 24 |
+
"topic": "short topic label"
|
| 25 |
+
}
|
| 26 |
+
]
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
Context:
|
| 30 |
+
{% for c in chunks %}
|
| 31 |
+
---
|
| 32 |
+
[source=S{{ loop.index }}] ({{ c.metadata.filename }} p.{{ c.metadata.page }})
|
| 33 |
+
{{ c.text }}
|
| 34 |
+
{% endfor %}
|
| 35 |
+
|
| 36 |
+
Respond with ONLY the JSON object.
|
| 37 |
+
|
src/prompts/summary_map.jinja2
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are summarizing a portion of a learning document for later synthesis.
|
| 2 |
+
|
| 3 |
+
Rules:
|
| 4 |
+
- Summarize only what the passages actually say.
|
| 5 |
+
- Preserve concepts, definitions, processes, and reasoning.
|
| 6 |
+
- Do not add outside knowledge. Do not speculate.
|
| 7 |
+
- Keep it dense but readable; aim for 4-8 sentences.
|
| 8 |
+
- Write the summary and key points in Vietnamese.
|
| 9 |
+
|
| 10 |
+
Output STRICTLY valid JSON with this shape and no extra text:
|
| 11 |
+
{
|
| 12 |
+
"summary": "dense paragraph summary",
|
| 13 |
+
"key_points": ["fact", "fact"]
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
Passages:
|
| 17 |
+
{% for c in chunks %}
|
| 18 |
+
---
|
| 19 |
+
[source=S{{ loop.index }}] ({{ c.metadata.filename }} p.{{ c.metadata.page }})
|
| 20 |
+
{{ c.text }}
|
| 21 |
+
{% endfor %}
|
| 22 |
+
|
| 23 |
+
Respond with ONLY the JSON object.
|
| 24 |
+
|
src/prompts/summary_reduce.jinja2
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are synthesizing partial summaries of a learning document into one coherent study summary.
|
| 2 |
+
|
| 3 |
+
Rules:
|
| 4 |
+
- Use only information present in the partial summaries.
|
| 5 |
+
- Do not add outside knowledge. Do not invent details.
|
| 6 |
+
- Merge duplicated points; preserve distinctions between related concepts.
|
| 7 |
+
- Write for a learner who wants to understand and remember the material.
|
| 8 |
+
- Write the summary and key points in Vietnamese.
|
| 9 |
+
|
| 10 |
+
Output STRICTLY valid JSON with this shape and no extra text:
|
| 11 |
+
{
|
| 12 |
+
"summary": "A coherent multi-paragraph study summary.",
|
| 13 |
+
"key_points": ["concise learnable fact", "concise learnable fact"]
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
Partial summaries:
|
| 17 |
+
{% for p in partials %}
|
| 18 |
+
---
|
| 19 |
+
Summary: {{ p.summary }}
|
| 20 |
+
{% if p.key_points %}
|
| 21 |
+
Key points:
|
| 22 |
+
{% for kp in p.key_points %}
|
| 23 |
+
- {{ kp }}
|
| 24 |
+
{% endfor %}
|
| 25 |
+
{% endif %}
|
| 26 |
+
{% endfor %}
|
| 27 |
+
|
| 28 |
+
Respond with ONLY the JSON object.
|
| 29 |
+
|
src/prompts/summary_single.jinja2
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are writing a study-oriented summary grounded strictly in the provided context.
|
| 2 |
+
|
| 3 |
+
Rules:
|
| 4 |
+
- Use only facts explicitly supported by the context. Do not invent details.
|
| 5 |
+
- Do not add outside knowledge.
|
| 6 |
+
- Focus on concepts, definitions, relationships, and reasoning a learner should retain.
|
| 7 |
+
- Keep the tone clear, neutral, and practical for study.
|
| 8 |
+
- Write the summary and key points in Vietnamese.
|
| 9 |
+
- If the context is empty or unrelated, return an empty summary and an empty list of key points.
|
| 10 |
+
|
| 11 |
+
Output STRICTLY valid JSON with this shape and no extra text:
|
| 12 |
+
{
|
| 13 |
+
"summary": "A coherent multi-paragraph study summary.",
|
| 14 |
+
"key_points": ["concise learnable fact", "concise learnable fact"]
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
Context:
|
| 18 |
+
{% for c in chunks %}
|
| 19 |
+
---
|
| 20 |
+
[source=S{{ loop.index }}] ({{ c.metadata.filename }} p.{{ c.metadata.page }})
|
| 21 |
+
{{ c.text }}
|
| 22 |
+
{% endfor %}
|
| 23 |
+
|
| 24 |
+
Respond with ONLY the JSON object.
|
| 25 |
+
|
src/rag.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Retrieval, prompts, citations, and grounded answers."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from functools import lru_cache
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
from jinja2 import Environment, FileSystemLoader, StrictUndefined
|
| 9 |
+
|
| 10 |
+
from src.config import settings
|
| 11 |
+
from src.filters import filters_to_qdrant
|
| 12 |
+
from src.llm import invoke_llm
|
| 13 |
+
from src.schemas import ChunkMetadata, Citation, RagAnswer, RetrievedChunk
|
| 14 |
+
from src.store import get_vector_store, scroll_all
|
| 15 |
+
|
| 16 |
+
PROMPTS_DIR = Path(__file__).parent / "prompts"
|
| 17 |
+
ANSWER_TEMPLATE = "answer.jinja2"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def retrieve(
|
| 21 |
+
query: str,
|
| 22 |
+
k: int | None = None,
|
| 23 |
+
filters: dict[str, object] | None = None,
|
| 24 |
+
collection_name: str | None = None,
|
| 25 |
+
) -> list[RetrievedChunk]:
|
| 26 |
+
store = get_vector_store(collection_name=collection_name)
|
| 27 |
+
hits = store.similarity_search_with_score(
|
| 28 |
+
query=query,
|
| 29 |
+
k=k or settings.top_k,
|
| 30 |
+
filter=filters_to_qdrant(filters),
|
| 31 |
+
)
|
| 32 |
+
return [
|
| 33 |
+
RetrievedChunk(
|
| 34 |
+
text=doc.page_content,
|
| 35 |
+
score=float(score),
|
| 36 |
+
metadata=ChunkMetadata(**doc.metadata),
|
| 37 |
+
)
|
| 38 |
+
for doc, score in hits
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def fetch_all_chunks(
|
| 43 |
+
filters: dict[str, object] | None = None,
|
| 44 |
+
collection_name: str | None = None,
|
| 45 |
+
) -> list[RetrievedChunk]:
|
| 46 |
+
"""Scroll every chunk matching the filter, ordered by filename → page → index."""
|
| 47 |
+
name = collection_name or settings.qdrant_collection
|
| 48 |
+
results: list[RetrievedChunk] = []
|
| 49 |
+
for page in scroll_all(name, scroll_filter=filters_to_qdrant(filters)):
|
| 50 |
+
for point in page:
|
| 51 |
+
payload = point.payload or {}
|
| 52 |
+
meta = payload.get("metadata") or {}
|
| 53 |
+
text = payload.get("page_content") or ""
|
| 54 |
+
if not meta or not text:
|
| 55 |
+
continue
|
| 56 |
+
results.append(RetrievedChunk(text=text, score=0.0, metadata=ChunkMetadata(**meta)))
|
| 57 |
+
results.sort(
|
| 58 |
+
key=lambda r: (
|
| 59 |
+
r.metadata.filename,
|
| 60 |
+
r.metadata.page,
|
| 61 |
+
int(r.metadata.chunk_id.rsplit(":", 1)[-1]),
|
| 62 |
+
)
|
| 63 |
+
)
|
| 64 |
+
return results
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
@lru_cache(maxsize=1)
|
| 68 |
+
def _jinja_env() -> Environment:
|
| 69 |
+
return Environment(
|
| 70 |
+
loader=FileSystemLoader(str(PROMPTS_DIR)),
|
| 71 |
+
autoescape=False,
|
| 72 |
+
undefined=StrictUndefined,
|
| 73 |
+
trim_blocks=True,
|
| 74 |
+
lstrip_blocks=True,
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def render_prompt(template_name: str, **context: object) -> str:
|
| 79 |
+
"""Render an arbitrary Jinja template from the prompts directory."""
|
| 80 |
+
return _jinja_env().get_template(template_name).render(**context)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def format_citations(chunks: list[RetrievedChunk]) -> list[Citation]:
|
| 84 |
+
return [
|
| 85 |
+
Citation(
|
| 86 |
+
source_index=i,
|
| 87 |
+
source_marker=f"S{i}",
|
| 88 |
+
filename=c.metadata.filename,
|
| 89 |
+
page=c.metadata.page,
|
| 90 |
+
section=c.metadata.section,
|
| 91 |
+
chunk_id=c.metadata.chunk_id,
|
| 92 |
+
)
|
| 93 |
+
for i, c in enumerate(chunks, start=1)
|
| 94 |
+
]
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def answer(
|
| 98 |
+
question: str,
|
| 99 |
+
k: int | None = None,
|
| 100 |
+
filters: dict[str, object] | None = None,
|
| 101 |
+
collection_name: str | None = None,
|
| 102 |
+
) -> RagAnswer:
|
| 103 |
+
chunks = retrieve(question, k=k, filters=filters, collection_name=collection_name)
|
| 104 |
+
if not chunks:
|
| 105 |
+
return RagAnswer(
|
| 106 |
+
question=question,
|
| 107 |
+
answer="Tôi không có đủ thông tin trong ngữ cảnh được cung cấp để trả lời.",
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
prompt = render_prompt(ANSWER_TEMPLATE, question=question, chunks=chunks)
|
| 111 |
+
text = invoke_llm(prompt)
|
| 112 |
+
|
| 113 |
+
return RagAnswer(
|
| 114 |
+
question=question,
|
| 115 |
+
answer=text.strip(),
|
| 116 |
+
citations=format_citations(chunks),
|
| 117 |
+
chunks=chunks,
|
| 118 |
+
)
|
| 119 |
+
|
src/schemas.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic schemas for chunks, answers, and learning outputs."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Literal
|
| 6 |
+
|
| 7 |
+
from pydantic import BaseModel, Field, model_validator
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class ChunkMetadata(BaseModel):
|
| 11 |
+
"""Stable metadata attached to every chunk stored in Qdrant."""
|
| 12 |
+
|
| 13 |
+
document_id: str
|
| 14 |
+
filename: str
|
| 15 |
+
source: str
|
| 16 |
+
page: int
|
| 17 |
+
chunk_id: str
|
| 18 |
+
section: str | None = None
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class RetrievedChunk(BaseModel):
|
| 22 |
+
"""A retrieved chunk with its score and metadata."""
|
| 23 |
+
|
| 24 |
+
text: str
|
| 25 |
+
score: float
|
| 26 |
+
metadata: ChunkMetadata
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class Citation(BaseModel):
|
| 30 |
+
"""Citation extracted from a retrieved chunk's metadata."""
|
| 31 |
+
|
| 32 |
+
source_index: int
|
| 33 |
+
source_marker: str
|
| 34 |
+
filename: str
|
| 35 |
+
page: int
|
| 36 |
+
section: str | None = None
|
| 37 |
+
chunk_id: str | None = None
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class RagAnswer(BaseModel):
|
| 41 |
+
"""Final grounded answer returned to the caller."""
|
| 42 |
+
|
| 43 |
+
question: str
|
| 44 |
+
answer: str
|
| 45 |
+
citations: list[Citation] = Field(default_factory=list)
|
| 46 |
+
chunks: list[RetrievedChunk] = Field(default_factory=list)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class Summary(BaseModel):
|
| 50 |
+
"""Grounded study-oriented summary of a document or subset."""
|
| 51 |
+
|
| 52 |
+
scope: Literal["query", "document", "filter", "corpus"]
|
| 53 |
+
target: str | None = None
|
| 54 |
+
summary: str
|
| 55 |
+
key_points: list[str] = Field(default_factory=list)
|
| 56 |
+
citations: list[Citation] = Field(default_factory=list)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class QuizItem(BaseModel):
|
| 60 |
+
"""A single multiple-choice quiz item grounded in the source material."""
|
| 61 |
+
|
| 62 |
+
question: str
|
| 63 |
+
options: list[str] = Field(min_length=4, max_length=4)
|
| 64 |
+
correct_index: int
|
| 65 |
+
explanation: str
|
| 66 |
+
source_markers: list[str] = Field(default_factory=list)
|
| 67 |
+
difficulty: str | None = None
|
| 68 |
+
topic: str | None = None
|
| 69 |
+
|
| 70 |
+
@model_validator(mode="after")
|
| 71 |
+
def _validate_correct_index(self) -> "QuizItem":
|
| 72 |
+
if not 0 <= self.correct_index < len(self.options):
|
| 73 |
+
raise ValueError(
|
| 74 |
+
f"correct_index {self.correct_index} out of range for {len(self.options)} options"
|
| 75 |
+
)
|
| 76 |
+
return self
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class QuizSet(BaseModel):
|
| 80 |
+
"""A reusable set of grounded quiz items with resolved citations."""
|
| 81 |
+
|
| 82 |
+
scope: Literal["query", "document", "filter", "corpus"]
|
| 83 |
+
target: str | None = None
|
| 84 |
+
items: list[QuizItem] = Field(default_factory=list)
|
| 85 |
+
citations: list[Citation] = Field(default_factory=list)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class Flashcard(BaseModel):
|
| 89 |
+
"""A single study flashcard grounded in the source material."""
|
| 90 |
+
|
| 91 |
+
front: str
|
| 92 |
+
back: str
|
| 93 |
+
hint: str | None = None
|
| 94 |
+
topic: str | None = None
|
| 95 |
+
source_markers: list[str] = Field(default_factory=list)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
class FlashcardSet(BaseModel):
|
| 99 |
+
"""A reusable set of grounded flashcards with resolved citations."""
|
| 100 |
+
|
| 101 |
+
scope: Literal["query", "document", "filter", "corpus"]
|
| 102 |
+
target: str | None = None
|
| 103 |
+
cards: list[Flashcard] = Field(default_factory=list)
|
| 104 |
+
citations: list[Citation] = Field(default_factory=list)
|
| 105 |
+
|
src/store.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Embeddings, Qdrant client, collection setup, and vector store."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from collections.abc import Iterator
|
| 6 |
+
from functools import lru_cache
|
| 7 |
+
|
| 8 |
+
from langchain_qdrant import QdrantVectorStore
|
| 9 |
+
from qdrant_client import QdrantClient
|
| 10 |
+
from qdrant_client.http import models as qmodels
|
| 11 |
+
|
| 12 |
+
from src.config import settings
|
| 13 |
+
from src.embeddings import get_embeddings
|
| 14 |
+
|
| 15 |
+
_SCROLL_PAGE_SIZE = 256
|
| 16 |
+
|
| 17 |
+
INDEXED_PAYLOAD_FIELDS = {
|
| 18 |
+
"metadata.document_id": qmodels.PayloadSchemaType.KEYWORD,
|
| 19 |
+
"metadata.filename": qmodels.PayloadSchemaType.KEYWORD,
|
| 20 |
+
"metadata.page": qmodels.PayloadSchemaType.INTEGER,
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def close_client() -> None:
|
| 25 |
+
if get_client.cache_info().currsize == 0:
|
| 26 |
+
return
|
| 27 |
+
client = get_client()
|
| 28 |
+
client.close()
|
| 29 |
+
get_client.cache_clear()
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@lru_cache(maxsize=1)
|
| 33 |
+
def get_client() -> QdrantClient:
|
| 34 |
+
"""Return a cached local Qdrant client backed by on-disk storage."""
|
| 35 |
+
settings.storage_dir.mkdir(parents=True, exist_ok=True)
|
| 36 |
+
return QdrantClient(path=str(settings.storage_dir))
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def ensure_collection(recreate: bool = False, collection_name: str | None = None) -> None:
|
| 40 |
+
"""Create the collection and payload indexes if they do not exist."""
|
| 41 |
+
client = get_client()
|
| 42 |
+
name = collection_name or settings.qdrant_collection
|
| 43 |
+
|
| 44 |
+
exists = client.collection_exists(name)
|
| 45 |
+
if exists and recreate:
|
| 46 |
+
client.delete_collection(name)
|
| 47 |
+
exists = False
|
| 48 |
+
|
| 49 |
+
if not exists:
|
| 50 |
+
dim = len(get_embeddings().embed_query("dimension probe"))
|
| 51 |
+
client.create_collection(
|
| 52 |
+
collection_name=name,
|
| 53 |
+
vectors_config=qmodels.VectorParams(
|
| 54 |
+
size=dim,
|
| 55 |
+
distance=qmodels.Distance.COSINE,
|
| 56 |
+
),
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
payload_schema = client.get_collection(name).payload_schema or {}
|
| 60 |
+
|
| 61 |
+
for field_name, field_schema in INDEXED_PAYLOAD_FIELDS.items():
|
| 62 |
+
existing = payload_schema.get(field_name)
|
| 63 |
+
if existing is None:
|
| 64 |
+
client.create_payload_index(
|
| 65 |
+
collection_name=name,
|
| 66 |
+
field_name=field_name,
|
| 67 |
+
field_schema=field_schema,
|
| 68 |
+
)
|
| 69 |
+
continue
|
| 70 |
+
|
| 71 |
+
existing_schema = getattr(existing, "data_type", None)
|
| 72 |
+
if existing_schema != field_schema:
|
| 73 |
+
raise ValueError(
|
| 74 |
+
f"Payload index for '{field_name}' has schema "
|
| 75 |
+
f"{existing_schema!r}, expected {field_schema!r}."
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def scroll_all(
|
| 80 |
+
collection_name: str,
|
| 81 |
+
scroll_filter: qmodels.Filter | None = None,
|
| 82 |
+
with_payload: bool | list[str] = True,
|
| 83 |
+
limit: int = _SCROLL_PAGE_SIZE,
|
| 84 |
+
) -> Iterator[list]:
|
| 85 |
+
"""Yield pages of Qdrant points (no vectors) until the collection is exhausted."""
|
| 86 |
+
client = get_client()
|
| 87 |
+
offset = None
|
| 88 |
+
while True:
|
| 89 |
+
try:
|
| 90 |
+
points, next_offset = client.scroll(
|
| 91 |
+
collection_name=collection_name,
|
| 92 |
+
scroll_filter=scroll_filter,
|
| 93 |
+
limit=limit,
|
| 94 |
+
offset=offset,
|
| 95 |
+
with_payload=with_payload,
|
| 96 |
+
with_vectors=False,
|
| 97 |
+
)
|
| 98 |
+
except ValueError as exc:
|
| 99 |
+
# Local Qdrant raises ValueError when collection doesn't exist yet.
|
| 100 |
+
if "not found" in str(exc).lower():
|
| 101 |
+
return
|
| 102 |
+
raise
|
| 103 |
+
yield points
|
| 104 |
+
if next_offset is None:
|
| 105 |
+
break
|
| 106 |
+
offset = next_offset
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def get_vector_store(collection_name: str | None = None) -> QdrantVectorStore:
|
| 110 |
+
return QdrantVectorStore(
|
| 111 |
+
client=get_client(),
|
| 112 |
+
collection_name=collection_name or settings.qdrant_collection,
|
| 113 |
+
embedding=get_embeddings(),
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def list_documents() -> list[dict[str, object]]:
|
| 118 |
+
"""List indexed documents with filename, document_id, pages, and chunk counts.
|
| 119 |
+
|
| 120 |
+
Returns one entry per filename matching the API `DocumentInfo` shape.
|
| 121 |
+
"""
|
| 122 |
+
pages_map: dict[str, set[int]] = {}
|
| 123 |
+
doc_id_map: dict[str, str] = {}
|
| 124 |
+
count_map: dict[str, int] = {}
|
| 125 |
+
|
| 126 |
+
for batch in scroll_all(settings.qdrant_collection, with_payload=["metadata"]):
|
| 127 |
+
for point in batch:
|
| 128 |
+
meta = (point.payload or {}).get("metadata") or {}
|
| 129 |
+
filename = meta.get("filename")
|
| 130 |
+
document_id = meta.get("document_id")
|
| 131 |
+
pg = meta.get("page")
|
| 132 |
+
if not filename or not document_id or not isinstance(pg, int):
|
| 133 |
+
continue
|
| 134 |
+
fn = str(filename)
|
| 135 |
+
doc_id_map.setdefault(fn, str(document_id))
|
| 136 |
+
pages_map.setdefault(fn, set()).add(pg)
|
| 137 |
+
count_map[fn] = count_map.get(fn, 0) + 1
|
| 138 |
+
|
| 139 |
+
return sorted(
|
| 140 |
+
[
|
| 141 |
+
{
|
| 142 |
+
"filename": fn,
|
| 143 |
+
"document_id": doc_id_map[fn],
|
| 144 |
+
"pages": sorted(pages_map[fn]),
|
| 145 |
+
"page_count": len(pages_map[fn]),
|
| 146 |
+
"chunk_count": count_map[fn],
|
| 147 |
+
}
|
| 148 |
+
for fn in doc_id_map
|
| 149 |
+
],
|
| 150 |
+
key=lambda d: str(d["filename"]),
|
| 151 |
+
)
|
| 152 |
+
|
static/aivn_logo.png
ADDED
|
static/style.css
ADDED
|
@@ -0,0 +1,567 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* ==========================================================
|
| 2 |
+
RAG Learning System — Design Tokens
|
| 3 |
+
========================================================== */
|
| 4 |
+
:root, #gradio-app {
|
| 5 |
+
--c-bg: #eef1fb;
|
| 6 |
+
--c-bg-2: #e4e9f7;
|
| 7 |
+
--c-surface: #ffffff;
|
| 8 |
+
--c-surface-2: #f2f5ff;
|
| 9 |
+
--c-surface-tint: rgba(255, 255, 255, 0.72);
|
| 10 |
+
|
| 11 |
+
--c-primary: #3d5af1;
|
| 12 |
+
--c-primary-dk: #2945d4;
|
| 13 |
+
--c-primary-pale: #dde4fd;
|
| 14 |
+
|
| 15 |
+
--c-accent: #6c3de0;
|
| 16 |
+
|
| 17 |
+
--c-text: #181c2e;
|
| 18 |
+
--c-text-2: #3d4460;
|
| 19 |
+
--c-text-muted: #6370a0;
|
| 20 |
+
|
| 21 |
+
/* Border needs to be darker than background for legibility */
|
| 22 |
+
--c-border: #b6c0e6;
|
| 23 |
+
--c-border-strong:#9faee0;
|
| 24 |
+
|
| 25 |
+
--c-note-bg: #fffbeb;
|
| 26 |
+
--c-note-bd: #f5c842;
|
| 27 |
+
--c-note-txt: #7a5200;
|
| 28 |
+
|
| 29 |
+
--c-status-bg: #f0f4ff;
|
| 30 |
+
|
| 31 |
+
--r-lg: 16px;
|
| 32 |
+
--r-md: 12px;
|
| 33 |
+
--r-sm: 8px;
|
| 34 |
+
--r-xs: 6px;
|
| 35 |
+
|
| 36 |
+
--shadow-card: 0 3px 16px rgba(40,55,130,.12), 0 1px 5px rgba(40,55,130,.07);
|
| 37 |
+
--shadow-panel: 0 6px 24px rgba(40,55,130,.10), 0 2px 8px rgba(40,55,130,.06);
|
| 38 |
+
--shadow-btn: 0 4px 14px rgba(61,90,241,.30);
|
| 39 |
+
--shadow-foc: 0 0 0 3px rgba(61,90,241,.20);
|
| 40 |
+
|
| 41 |
+
--app-max-width: 1440px;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
/* ==========================================================
|
| 45 |
+
Gradio variable bridge — all blocks transparent by default
|
| 46 |
+
========================================================== */
|
| 47 |
+
#gradio-app {
|
| 48 |
+
--background-fill-primary: var(--c-bg);
|
| 49 |
+
--background-fill-secondary: var(--c-bg-2);
|
| 50 |
+
--block-background-fill: transparent;
|
| 51 |
+
--block-border-color: transparent;
|
| 52 |
+
--block-border-width: 0px;
|
| 53 |
+
--input-background-fill: var(--c-surface);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/* ==========================================================
|
| 57 |
+
Page shell
|
| 58 |
+
========================================================== */
|
| 59 |
+
html, body {
|
| 60 |
+
margin: 0; padding: 0;
|
| 61 |
+
background: linear-gradient(155deg, var(--c-bg) 0%, var(--c-bg-2) 100%) !important;
|
| 62 |
+
color: var(--c-text);
|
| 63 |
+
min-height: 100vh;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
/* strip only safe wrappers, NOT .column / .row (Gradio needs those for flex) */
|
| 67 |
+
#gradio-app,
|
| 68 |
+
.gradio-container,
|
| 69 |
+
.main,
|
| 70 |
+
.contain,
|
| 71 |
+
.html-container {
|
| 72 |
+
background: transparent !important;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
.gradio-container {
|
| 76 |
+
max-width: var(--app-max-width) !important;
|
| 77 |
+
margin: 0 auto !important;
|
| 78 |
+
padding: 12px 18px 10px !important;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
/* ==========================================================
|
| 82 |
+
Header
|
| 83 |
+
========================================================== */
|
| 84 |
+
.header-row {
|
| 85 |
+
gap: 14px !important;
|
| 86 |
+
padding: 4px 0 10px !important;
|
| 87 |
+
align-items: center !important;
|
| 88 |
+
background: transparent !important;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.header-row img {
|
| 92 |
+
height: 96px;
|
| 93 |
+
width: auto;
|
| 94 |
+
object-fit: contain;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
.header-meta {
|
| 98 |
+
display: flex;
|
| 99 |
+
flex-direction: column;
|
| 100 |
+
justify-content: center;
|
| 101 |
+
min-width: 0;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
.header-title {
|
| 105 |
+
margin: 0;
|
| 106 |
+
font-size: 1.8rem;
|
| 107 |
+
line-height: 1.15;
|
| 108 |
+
font-weight: 800;
|
| 109 |
+
color: var(--c-text);
|
| 110 |
+
letter-spacing: -0.025em;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
.header-sub {
|
| 114 |
+
margin: 5px 0 0;
|
| 115 |
+
font-size: 0.95rem;
|
| 116 |
+
color: var(--c-text-muted);
|
| 117 |
+
font-weight: 500;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
/* ==========================================================
|
| 121 |
+
Info card (warning note)
|
| 122 |
+
========================================================== */
|
| 123 |
+
.info-card {
|
| 124 |
+
background: var(--c-note-bg) !important;
|
| 125 |
+
border: 1px solid var(--c-note-bd) !important;
|
| 126 |
+
border-left: 4px solid var(--c-note-bd) !important;
|
| 127 |
+
border-radius: var(--r-md);
|
| 128 |
+
padding: 10px 14px;
|
| 129 |
+
margin: 0 0 10px;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.info-card-title,
|
| 133 |
+
.info-card-list,
|
| 134 |
+
.info-card-list li { color: var(--c-note-txt) !important; }
|
| 135 |
+
|
| 136 |
+
.info-card-title { margin-bottom: 6px; font-size: 0.95rem; font-weight: 700; }
|
| 137 |
+
|
| 138 |
+
.info-card-list {
|
| 139 |
+
margin: 0;
|
| 140 |
+
padding-left: 18px;
|
| 141 |
+
font-size: 0.90rem;
|
| 142 |
+
line-height: 1.65;
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
.info-card-list li + li { margin-top: 3px; }
|
| 146 |
+
|
| 147 |
+
/* ==========================================================
|
| 148 |
+
2-Column main layout — EXPLICIT flex, do not rely solely on Gradio
|
| 149 |
+
========================================================== */
|
| 150 |
+
.main-layout {
|
| 151 |
+
display: flex !important;
|
| 152 |
+
flex-direction: row !important;
|
| 153 |
+
flex-wrap: wrap !important;
|
| 154 |
+
gap: 12px !important;
|
| 155 |
+
align-items: flex-start !important;
|
| 156 |
+
background: transparent !important;
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
.control-stack {
|
| 160 |
+
display: flex !important;
|
| 161 |
+
flex-direction: column !important;
|
| 162 |
+
gap: 10px !important;
|
| 163 |
+
flex: 4 1 320px !important;
|
| 164 |
+
min-width: 300px !important;
|
| 165 |
+
background: var(--c-surface-tint) !important;
|
| 166 |
+
border: 1.5px solid var(--c-border-strong) !important;
|
| 167 |
+
border-radius: var(--r-lg) !important;
|
| 168 |
+
box-shadow: var(--shadow-panel) !important;
|
| 169 |
+
padding: 14px 16px !important;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.preview-col {
|
| 173 |
+
display: flex !important;
|
| 174 |
+
flex-direction: column !important;
|
| 175 |
+
gap: 10px !important;
|
| 176 |
+
flex: 7 1 480px !important;
|
| 177 |
+
min-width: 420px !important;
|
| 178 |
+
background: var(--c-surface-tint) !important;
|
| 179 |
+
border: 1.5px solid var(--c-border-strong) !important;
|
| 180 |
+
border-radius: var(--r-lg) !important;
|
| 181 |
+
box-shadow: var(--shadow-panel) !important;
|
| 182 |
+
padding: 14px 16px !important;
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
/* ==========================================================
|
| 186 |
+
Cards (control-card)
|
| 187 |
+
— These are gr.Group, which Gradio renders as .block.gr-group
|
| 188 |
+
— We give them explicit white surface + shadow
|
| 189 |
+
========================================================== */
|
| 190 |
+
.control-card {
|
| 191 |
+
background: var(--c-surface) !important;
|
| 192 |
+
border: 1.5px solid var(--c-border-strong) !important;
|
| 193 |
+
border-radius: var(--r-lg) !important;
|
| 194 |
+
box-shadow: var(--shadow-card) !important;
|
| 195 |
+
padding: 14px 16px !important;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
/* accent top stripe */
|
| 199 |
+
.control-card { border-top: 3px solid var(--c-primary) !important; }
|
| 200 |
+
|
| 201 |
+
/* strip nested .block inside cards — theme transparent already handles most,
|
| 202 |
+
but belt-and-suspenders for sub-blocks that may carry inline styles */
|
| 203 |
+
.control-card .block {
|
| 204 |
+
background: transparent !important;
|
| 205 |
+
border: none !important;
|
| 206 |
+
box-shadow: none !important;
|
| 207 |
+
padding: 0 !important;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
/* ==========================================================
|
| 211 |
+
Typography
|
| 212 |
+
========================================================== */
|
| 213 |
+
label, .gr-label, .block-title {
|
| 214 |
+
color: var(--c-text-2) !important;
|
| 215 |
+
font-size: 0.93rem !important;
|
| 216 |
+
font-weight: 600 !important;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
.help-markdown p,
|
| 220 |
+
.help-markdown li,
|
| 221 |
+
.doc-summary,
|
| 222 |
+
.doc-summary p,
|
| 223 |
+
.doc-summary li {
|
| 224 |
+
font-size: 0.91rem;
|
| 225 |
+
line-height: 1.65;
|
| 226 |
+
color: var(--c-text-2) !important;
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
/* standalone markdown outside cards (doc_list_md etc.) */
|
| 230 |
+
.prose p,
|
| 231 |
+
.prose li,
|
| 232 |
+
.prose h1, .prose h2, .prose h3 {
|
| 233 |
+
color: var(--c-text) !important;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
/* ==========================================================
|
| 237 |
+
Inputs & textareas
|
| 238 |
+
========================================================== */
|
| 239 |
+
textarea, input:not([type="range"]):not([type="checkbox"]) {
|
| 240 |
+
background: var(--c-surface) !important;
|
| 241 |
+
color: var(--c-text) !important;
|
| 242 |
+
font-weight: 500 !important;
|
| 243 |
+
border: 1.5px solid var(--c-border) !important;
|
| 244 |
+
border-radius: var(--r-sm) !important;
|
| 245 |
+
box-shadow: none !important;
|
| 246 |
+
transition: border-color 0.15s, box-shadow 0.15s !important;
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
select {
|
| 250 |
+
background: var(--c-surface) !important;
|
| 251 |
+
color: var(--c-text) !important;
|
| 252 |
+
font-weight: 600 !important;
|
| 253 |
+
border: 1.5px solid var(--c-border) !important;
|
| 254 |
+
border-radius: var(--r-sm) !important;
|
| 255 |
+
box-shadow: none !important;
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
select:focus {
|
| 259 |
+
border-color: var(--c-primary) !important;
|
| 260 |
+
box-shadow: var(--shadow-foc) !important;
|
| 261 |
+
outline: none !important;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
textarea:focus, input:focus {
|
| 265 |
+
border-color: var(--c-primary) !important;
|
| 266 |
+
box-shadow: var(--shadow-foc) !important;
|
| 267 |
+
outline: none !important;
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
::placeholder { color: var(--c-text-muted) !important; opacity: 1 !important; }
|
| 271 |
+
|
| 272 |
+
/* ==========================================================
|
| 273 |
+
Gradio form wrappers — remove default gray panels
|
| 274 |
+
========================================================== */
|
| 275 |
+
/* Many components render with fieldset/panel backgrounds that ignore theme vars */
|
| 276 |
+
.gradio-container fieldset,
|
| 277 |
+
.gradio-container .fieldset,
|
| 278 |
+
.gradio-container .panel,
|
| 279 |
+
.gradio-container .gr-panel,
|
| 280 |
+
.gradio-container .form,
|
| 281 |
+
.gradio-container .gr-form,
|
| 282 |
+
.gradio-container .wrap {
|
| 283 |
+
background: transparent !important;
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
/* CheckboxGroup / RadioGroup option panels */
|
| 287 |
+
.gradio-container fieldset {
|
| 288 |
+
border: 1.5px solid var(--c-border-strong) !important;
|
| 289 |
+
border-radius: var(--r-md) !important;
|
| 290 |
+
padding: 12px 12px 10px !important;
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
.gradio-container fieldset legend {
|
| 294 |
+
color: var(--c-text-2) !important;
|
| 295 |
+
font-weight: 700 !important;
|
| 296 |
+
padding: 0 8px !important;
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
/* Make option rows readable and consistent */
|
| 300 |
+
.gradio-container input[type="checkbox"] {
|
| 301 |
+
accent-color: var(--c-primary);
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
/* Dropdown wrapper surfaces */
|
| 305 |
+
.gradio-container .gr-dropdown,
|
| 306 |
+
.gradio-container .gr-checkboxgroup,
|
| 307 |
+
.gradio-container .gr-radiogroup,
|
| 308 |
+
.gradio-container .gr-slider,
|
| 309 |
+
.gradio-container .gr-text-input,
|
| 310 |
+
.gradio-container .gr-textbox,
|
| 311 |
+
.gradio-container .gr-code {
|
| 312 |
+
background: transparent !important;
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
/* Tab content area sometimes renders as a gray block */
|
| 316 |
+
.tabs,
|
| 317 |
+
.tabs .tabitem,
|
| 318 |
+
.tabs .tabitem > .block,
|
| 319 |
+
.tabs .tabitem > .wrap,
|
| 320 |
+
.tabs .tabitem > .panel {
|
| 321 |
+
background: transparent !important;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
/* ==========================================================
|
| 325 |
+
Slider
|
| 326 |
+
========================================================== */
|
| 327 |
+
input[type="range"] {
|
| 328 |
+
border: none !important;
|
| 329 |
+
box-shadow: none !important;
|
| 330 |
+
accent-color: var(--c-primary);
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
/* ==========================================================
|
| 334 |
+
Primary button (.gen-btn)
|
| 335 |
+
========================================================== */
|
| 336 |
+
.gen-btn {
|
| 337 |
+
width: 100% !important;
|
| 338 |
+
height: 46px !important;
|
| 339 |
+
font-size: 15px !important;
|
| 340 |
+
font-weight: 700 !important;
|
| 341 |
+
border-radius: var(--r-md) !important;
|
| 342 |
+
border: none !important;
|
| 343 |
+
background: linear-gradient(135deg, #5771f5 0%, var(--c-primary) 55%, var(--c-primary-dk) 100%) !important;
|
| 344 |
+
color: #fff !important;
|
| 345 |
+
cursor: pointer !important;
|
| 346 |
+
box-shadow: var(--shadow-btn) !important;
|
| 347 |
+
transition: opacity 0.14s, box-shadow 0.14s, transform 0.10s !important;
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
.gen-btn:hover {
|
| 351 |
+
opacity: 0.88 !important;
|
| 352 |
+
box-shadow: 0 6px 22px rgba(61,90,241,.38) !important;
|
| 353 |
+
transform: translateY(-1px) !important;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
.gen-btn:active {
|
| 357 |
+
transform: translateY(0) !important;
|
| 358 |
+
opacity: 1 !important;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
/* Secondary buttons */
|
| 362 |
+
button:not(.gen-btn) {
|
| 363 |
+
border-radius: var(--r-sm) !important;
|
| 364 |
+
border: 1.5px solid var(--c-border) !important;
|
| 365 |
+
background: var(--c-surface-2) !important;
|
| 366 |
+
color: var(--c-text-2) !important;
|
| 367 |
+
font-weight: 600 !important;
|
| 368 |
+
font-size: 0.92rem !important;
|
| 369 |
+
transition: background 0.14s, border-color 0.14s, color 0.14s !important;
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
button:not(.gen-btn):hover {
|
| 373 |
+
background: var(--c-primary-pale) !important;
|
| 374 |
+
border-color: var(--c-primary) !important;
|
| 375 |
+
color: var(--c-primary-dk) !important;
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
/* tab nav buttons must override secondary button rules */
|
| 379 |
+
.tabs > .tab-nav button,
|
| 380 |
+
.tabs > .tab-nav button:not(.gen-btn) {
|
| 381 |
+
border: none !important;
|
| 382 |
+
background: transparent !important;
|
| 383 |
+
border-bottom: 2px solid transparent !important;
|
| 384 |
+
border-radius: var(--r-xs) var(--r-xs) 0 0 !important;
|
| 385 |
+
color: var(--c-text-muted) !important;
|
| 386 |
+
font-weight: 600 !important;
|
| 387 |
+
font-size: 0.93rem !important;
|
| 388 |
+
padding: 9px 16px !important;
|
| 389 |
+
margin-bottom: -2px !important;
|
| 390 |
+
box-shadow: none !important;
|
| 391 |
+
transition: color 0.14s, border-color 0.14s, background 0.14s !important;
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
.tabs > .tab-nav button.selected,
|
| 395 |
+
.tabs > .tab-nav button[aria-selected="true"] {
|
| 396 |
+
color: var(--c-primary) !important;
|
| 397 |
+
border-bottom-color: var(--c-primary) !important;
|
| 398 |
+
background: var(--c-primary-pale) !important;
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
.tabs > .tab-nav button:not(.selected):not([aria-selected="true"]):hover {
|
| 402 |
+
color: var(--c-text-2) !important;
|
| 403 |
+
background: var(--c-surface-2) !important;
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
/* ==========================================================
|
| 407 |
+
Status bar
|
| 408 |
+
========================================================== */
|
| 409 |
+
.status-bar {
|
| 410 |
+
background: var(--c-status-bg) !important;
|
| 411 |
+
border: 1.5px solid var(--c-border) !important;
|
| 412 |
+
border-left: 3px solid var(--c-primary) !important;
|
| 413 |
+
border-radius: var(--r-sm) !important;
|
| 414 |
+
padding: 9px 12px !important;
|
| 415 |
+
font-size: 0.90rem !important;
|
| 416 |
+
line-height: 1.5 !important;
|
| 417 |
+
color: var(--c-text-2) !important;
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
/* ==========================================================
|
| 421 |
+
Accordion
|
| 422 |
+
========================================================== */
|
| 423 |
+
.gradio-accordion,
|
| 424 |
+
.gradio-accordion > div,
|
| 425 |
+
.gradio-accordion details {
|
| 426 |
+
background: var(--c-surface-2) !important;
|
| 427 |
+
border: 1.5px solid var(--c-border) !important;
|
| 428 |
+
border-radius: var(--r-md) !important;
|
| 429 |
+
box-shadow: none !important;
|
| 430 |
+
overflow: hidden !important;
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
.gradio-accordion summary,
|
| 434 |
+
.gradio-accordion button {
|
| 435 |
+
background: var(--c-surface-2) !important;
|
| 436 |
+
color: var(--c-text-2) !important;
|
| 437 |
+
font-weight: 700 !important;
|
| 438 |
+
font-size: 0.93rem !important;
|
| 439 |
+
border: none !important;
|
| 440 |
+
box-shadow: none !important;
|
| 441 |
+
padding: 10px 14px !important;
|
| 442 |
+
transition: background 0.14s !important;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
.gradio-accordion summary:hover,
|
| 446 |
+
.gradio-accordion button:not(.gen-btn):hover {
|
| 447 |
+
background: var(--c-primary-pale) !important;
|
| 448 |
+
border-color: transparent !important;
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
/* ==========================================================
|
| 452 |
+
Preview header
|
| 453 |
+
========================================================== */
|
| 454 |
+
.preview-header {
|
| 455 |
+
display: flex;
|
| 456 |
+
align-items: flex-start;
|
| 457 |
+
justify-content: space-between;
|
| 458 |
+
gap: 8px;
|
| 459 |
+
margin: 0 0 10px;
|
| 460 |
+
padding-bottom: 10px;
|
| 461 |
+
border-bottom: 1.5px solid var(--c-border);
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
.preview-title {
|
| 465 |
+
margin: 0;
|
| 466 |
+
font-size: 1.05rem;
|
| 467 |
+
font-weight: 700;
|
| 468 |
+
color: var(--c-text) !important;
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
.preview-sub {
|
| 472 |
+
margin: 3px 0 0;
|
| 473 |
+
font-size: 0.86rem;
|
| 474 |
+
color: var(--c-text-muted) !important;
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
/* ==========================================================
|
| 478 |
+
Result markdown
|
| 479 |
+
========================================================== */
|
| 480 |
+
.result-markdown {
|
| 481 |
+
background: var(--c-surface) !important;
|
| 482 |
+
border: 1.5px solid var(--c-border) !important;
|
| 483 |
+
border-radius: var(--r-md) !important;
|
| 484 |
+
padding: 14px 16px !important;
|
| 485 |
+
min-height: 60px;
|
| 486 |
+
box-shadow: none !important;
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
.result-markdown h1,
|
| 490 |
+
.result-markdown h2,
|
| 491 |
+
.result-markdown h3 { color: var(--c-primary-dk) !important; }
|
| 492 |
+
|
| 493 |
+
.result-markdown p,
|
| 494 |
+
.result-markdown li,
|
| 495 |
+
.result-markdown strong { color: var(--c-text) !important; }
|
| 496 |
+
|
| 497 |
+
.result-markdown a { color: var(--c-primary) !important; }
|
| 498 |
+
|
| 499 |
+
.result-markdown code {
|
| 500 |
+
background: var(--c-surface-2) !important;
|
| 501 |
+
color: var(--c-accent) !important;
|
| 502 |
+
border-radius: 4px !important;
|
| 503 |
+
padding: 1px 5px !important;
|
| 504 |
+
font-size: 0.87em !important;
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
.result-markdown pre {
|
| 508 |
+
background: var(--c-surface-2) !important;
|
| 509 |
+
border-radius: var(--r-md) !important;
|
| 510 |
+
}
|
| 511 |
+
|
| 512 |
+
/* ==========================================================
|
| 513 |
+
Tabs nav bar
|
| 514 |
+
========================================================== */
|
| 515 |
+
.tabs {
|
| 516 |
+
background: var(--c-surface-tint) !important;
|
| 517 |
+
border: 1.5px solid var(--c-border-strong) !important;
|
| 518 |
+
border-radius: var(--r-lg) !important;
|
| 519 |
+
box-shadow: var(--shadow-panel) !important;
|
| 520 |
+
padding: 10px 12px 12px !important;
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
.tabs > .tab-nav {
|
| 524 |
+
border-bottom: 2px solid var(--c-border) !important;
|
| 525 |
+
gap: 2px !important;
|
| 526 |
+
padding: 0 2px !important;
|
| 527 |
+
background: transparent !important;
|
| 528 |
+
}
|
| 529 |
+
|
| 530 |
+
/* ==========================================================
|
| 531 |
+
Footer
|
| 532 |
+
========================================================== */
|
| 533 |
+
.footer-text {
|
| 534 |
+
text-align: center;
|
| 535 |
+
font-size: 0.88rem;
|
| 536 |
+
color: var(--c-text-muted) !important;
|
| 537 |
+
padding: 10px 0 6px;
|
| 538 |
+
margin-top: 14px;
|
| 539 |
+
border-top: 1px solid var(--c-border);
|
| 540 |
+
}
|
| 541 |
+
|
| 542 |
+
.footer-text a {
|
| 543 |
+
color: var(--c-primary) !important;
|
| 544 |
+
text-decoration: none;
|
| 545 |
+
font-weight: 600;
|
| 546 |
+
}
|
| 547 |
+
|
| 548 |
+
.footer-text a:hover { text-decoration: underline; }
|
| 549 |
+
|
| 550 |
+
/* ==========================================================
|
| 551 |
+
Responsive
|
| 552 |
+
========================================================== */
|
| 553 |
+
@media (max-width: 860px) {
|
| 554 |
+
.main-layout { flex-direction: column !important; }
|
| 555 |
+
.control-stack, .preview-col {
|
| 556 |
+
flex: 1 1 100% !important;
|
| 557 |
+
min-width: 0 !important;
|
| 558 |
+
padding: 12px 12px !important;
|
| 559 |
+
}
|
| 560 |
+
.gradio-container { padding: 6px 8px !important; }
|
| 561 |
+
.header-row { gap: 8px !important; padding: 2px 0 6px !important; }
|
| 562 |
+
.header-row img { height: 56px; }
|
| 563 |
+
.header-title { font-size: 1.35rem; }
|
| 564 |
+
.header-sub { font-size: 0.87rem; }
|
| 565 |
+
.control-card { padding: 10px 12px !important; }
|
| 566 |
+
.tabs { padding: 8px 8px 10px !important; }
|
| 567 |
+
}
|