Spaces:
Sleeping
Sleeping
Oleksii Obolonskyi commited on
Commit ·
6f19c35
1
Parent(s): 123d866
Persist FAISS indexes across restarts
Browse files
README.md
CHANGED
|
@@ -54,13 +54,12 @@ Set these environment variables (local dev or Hugging Face Spaces secrets):
|
|
| 54 |
|
| 55 |
```bash
|
| 56 |
export HF_TOKEN=hf_your_token_here
|
| 57 |
-
export RAG_HF_MODEL=
|
| 58 |
-
export
|
| 59 |
-
export
|
| 60 |
-
export RAG_LLM_BACKEND=hf
|
| 61 |
```
|
| 62 |
|
| 63 |
-
Optional: set `
|
| 64 |
|
| 65 |
### 3) Prepare sources
|
| 66 |
|
|
@@ -88,8 +87,8 @@ streamlit run app.py
|
|
| 88 |
```
|
| 89 |
|
| 90 |
Open `http://localhost:8501`. On first run, the app builds FAISS indexes:
|
| 91 |
-
- `data/
|
| 92 |
-
- `data/
|
| 93 |
|
| 94 |
## Configuration
|
| 95 |
|
|
@@ -98,16 +97,15 @@ You can override defaults via environment variables:
|
|
| 98 |
```bash
|
| 99 |
export RAG_BOOK_CHUNKS_PATH=data/normalized/chunks_books.jsonl
|
| 100 |
export RAG_ARTICLE_CHUNKS_PATH=data/normalized/chunks_articles.jsonl
|
| 101 |
-
export RAG_BOOK_INDEX_PATH=data/
|
| 102 |
-
export RAG_ARTICLE_INDEX_PATH=data/
|
| 103 |
export RAG_BOOK_MANIFEST_PATH=data/normalized/manifest_books.json
|
| 104 |
export RAG_ARTICLE_MANIFEST_PATH=data/normalized/manifest_articles.json
|
| 105 |
export RAG_EMBED_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 106 |
export HF_TOKEN=hf_your_token_here
|
| 107 |
-
export
|
| 108 |
-
export
|
| 109 |
-
export
|
| 110 |
-
export RAG_LLM_BACKEND=hf
|
| 111 |
export RAG_MAX_CONTEXT_TOKENS=6000
|
| 112 |
export RAG_INJECT_MAX_CHUNKS=6
|
| 113 |
export RAG_MAX_GENERATION_TOKENS=512
|
|
@@ -119,9 +117,14 @@ export RAG_ARTICLE_SOURCES=sources_articles.json
|
|
| 119 |
## Deploy to Hugging Face Spaces
|
| 120 |
|
| 121 |
1. Create a new Space (Streamlit SDK) and push this repo.
|
| 122 |
-
2.
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
## Common maintenance tasks
|
| 127 |
|
|
|
|
| 54 |
|
| 55 |
```bash
|
| 56 |
export HF_TOKEN=hf_your_token_here
|
| 57 |
+
export RAG_HF_MODEL=Qwen/Qwen2.5-7B-Instruct-1M:featherless-ai
|
| 58 |
+
export RAG_HF_PROVIDER_SUFFIX=featherless-ai
|
| 59 |
+
export RAG_LLM_BACKEND=hf-router
|
|
|
|
| 60 |
```
|
| 61 |
|
| 62 |
+
Optional: set `RAG_HF_PROVIDER_SUFFIX` if your model id is missing the provider suffix.
|
| 63 |
|
| 64 |
### 3) Prepare sources
|
| 65 |
|
|
|
|
| 87 |
```
|
| 88 |
|
| 89 |
Open `http://localhost:8501`. On first run, the app builds FAISS indexes:
|
| 90 |
+
- `data/cache/index_books.faiss` (local)
|
| 91 |
+
- `data/cache/index_articles.faiss` (local)
|
| 92 |
|
| 93 |
## Configuration
|
| 94 |
|
|
|
|
| 97 |
```bash
|
| 98 |
export RAG_BOOK_CHUNKS_PATH=data/normalized/chunks_books.jsonl
|
| 99 |
export RAG_ARTICLE_CHUNKS_PATH=data/normalized/chunks_articles.jsonl
|
| 100 |
+
export RAG_BOOK_INDEX_PATH=data/cache/index_books.faiss
|
| 101 |
+
export RAG_ARTICLE_INDEX_PATH=data/cache/index_articles.faiss
|
| 102 |
export RAG_BOOK_MANIFEST_PATH=data/normalized/manifest_books.json
|
| 103 |
export RAG_ARTICLE_MANIFEST_PATH=data/normalized/manifest_articles.json
|
| 104 |
export RAG_EMBED_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 105 |
export HF_TOKEN=hf_your_token_here
|
| 106 |
+
export RAG_HF_MODEL=Qwen/Qwen2.5-7B-Instruct-1M:featherless-ai
|
| 107 |
+
export RAG_HF_PROVIDER_SUFFIX=featherless-ai
|
| 108 |
+
export RAG_LLM_BACKEND=hf-router
|
|
|
|
| 109 |
export RAG_MAX_CONTEXT_TOKENS=6000
|
| 110 |
export RAG_INJECT_MAX_CHUNKS=6
|
| 111 |
export RAG_MAX_GENERATION_TOKENS=512
|
|
|
|
| 117 |
## Deploy to Hugging Face Spaces
|
| 118 |
|
| 119 |
1. Create a new Space (Streamlit SDK) and push this repo.
|
| 120 |
+
2. Enable Persistent Storage and set caches:
|
| 121 |
+
- `HF_HOME=/data/.huggingface`
|
| 122 |
+
- `SENTENCE_TRANSFORMERS_HOME=/data/.sentence-transformers`
|
| 123 |
+
3. In Space Settings → Secrets, set `HF_TOKEN` (required) and optionally `GITHUB_TOKEN`.
|
| 124 |
+
4. In Space Settings → Variables, set `RAG_HF_MODEL` and `RAG_LLM_BACKEND=hf-router`.
|
| 125 |
+
5. Optional: `RAG_HF_PROVIDER_SUFFIX`, `RAG_INJECT_MAX_CHUNKS`, and `RAG_RETRIEVE_TOPK_MULT`.
|
| 126 |
+
|
| 127 |
+
With persistent storage enabled, FAISS indexes are stored in `/data/rag_cache` and reused across restarts. They rebuild only when the normalized chunk/manifest files change.
|
| 128 |
|
| 129 |
## Common maintenance tasks
|
| 130 |
|
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
|
|
|
| 4 |
import html
|
| 5 |
from dataclasses import dataclass
|
| 6 |
from pathlib import Path
|
|
@@ -19,6 +20,16 @@ from sentence_transformers import SentenceTransformer
|
|
| 19 |
|
| 20 |
load_dotenv(Path(__file__).resolve().parent / ".env", override=True)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
COMPANY_NAME = "O_O.inc"
|
| 23 |
COMPANY_EMAIL = "o.obolonsky@proton.me"
|
| 24 |
COMPANY_PHONE = "+380953555919"
|
|
@@ -49,8 +60,8 @@ CONFIG = AppConfig(
|
|
| 49 |
article_chunks_path=os.environ.get("RAG_ARTICLE_CHUNKS_PATH", "data/normalized/chunks_articles.jsonl"),
|
| 50 |
book_manifest_path=os.environ.get("RAG_BOOK_MANIFEST_PATH", "data/normalized/manifest_books.json"),
|
| 51 |
article_manifest_path=os.environ.get("RAG_ARTICLE_MANIFEST_PATH", "data/normalized/manifest_articles.json"),
|
| 52 |
-
book_index_path=os.environ.get("RAG_BOOK_INDEX_PATH", "
|
| 53 |
-
article_index_path=os.environ.get("RAG_ARTICLE_INDEX_PATH", "
|
| 54 |
embed_model=os.environ.get("RAG_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2"),
|
| 55 |
max_context_tokens=int(os.getenv("RAG_MAX_CONTEXT_TOKENS", "6000")),
|
| 56 |
inject_max_chunks=int(os.getenv("RAG_INJECT_MAX_CHUNKS", os.getenv("RAG_MAX_CHUNKS", "6"))),
|
|
@@ -70,6 +81,8 @@ BOOK_MANIFEST_PATH = CONFIG.book_manifest_path
|
|
| 70 |
ARTICLE_MANIFEST_PATH = CONFIG.article_manifest_path
|
| 71 |
BOOK_INDEX_PATH = CONFIG.book_index_path
|
| 72 |
ARTICLE_INDEX_PATH = CONFIG.article_index_path
|
|
|
|
|
|
|
| 73 |
EMBED_MODEL = CONFIG.embed_model
|
| 74 |
MAX_CONTEXT_TOKENS = CONFIG.max_context_tokens
|
| 75 |
INJECT_MAX_CHUNKS = CONFIG.inject_max_chunks
|
|
@@ -82,8 +95,10 @@ PER_DOC_CAP = CONFIG.per_doc_cap
|
|
| 82 |
OVERLAP_FILTER = CONFIG.overlap_filter
|
| 83 |
RETRIEVE_TOPK_MULT = CONFIG.retrieve_topk_mult
|
| 84 |
|
|
|
|
|
|
|
|
|
|
| 85 |
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 86 |
-
HF_MODEL = os.getenv("RAG_HF_MODEL", "Qwen/Qwen2.5-7B-Instruct-1M:featherless-ai").strip()
|
| 87 |
|
| 88 |
OLLAMA_BASE_URL = os.environ.get("RAG_OLLAMA_URL", "http://localhost:11434").rstrip("/")
|
| 89 |
OLLAMA_MODEL = os.environ.get("RAG_OLLAMA_MODEL", "llama3.2:1b")
|
|
@@ -330,23 +345,99 @@ def build_faiss_index(vectors: np.ndarray) -> faiss.Index:
|
|
| 330 |
index.add(vectors)
|
| 331 |
return index
|
| 332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
def load_or_build_index(
|
|
|
|
| 334 |
chunks: List[Chunk],
|
| 335 |
embedder: SentenceTransformer,
|
|
|
|
|
|
|
| 336 |
index_path: str,
|
| 337 |
-
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
| 339 |
p = Path(index_path)
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
texts = [c.text for c in chunks]
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
vecs = np.asarray(vecs, dtype="float32")
|
| 346 |
index = build_faiss_index(vecs)
|
| 347 |
p.parent.mkdir(parents=True, exist_ok=True)
|
| 348 |
faiss.write_index(index, str(p))
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
def retrieve(query: str, embedder: SentenceTransformer, index: faiss.Index, chunks: List[Chunk], k: int = 8) -> List[Tuple[float, Chunk]]:
|
| 352 |
qv = embedder.encode([query], normalize_embeddings=True)
|
|
@@ -594,9 +685,16 @@ def answer_question(
|
|
| 594 |
"chunks_cap": INJECT_MAX_CHUNKS,
|
| 595 |
"context_cap": MAX_CONTEXT_TOKENS,
|
| 596 |
}
|
| 597 |
-
answer, err = llm_chat(prompt)
|
|
|
|
|
|
|
| 598 |
if err:
|
| 599 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
return f"Model error: {err}", citations, False
|
| 601 |
if not answer:
|
| 602 |
st.error("Empty response from model")
|
|
@@ -610,6 +708,34 @@ def system_message() -> str:
|
|
| 610 |
"Keep answers concise. Cite sources using the provided citation tags exactly."
|
| 611 |
)
|
| 612 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
def is_running_on_spaces() -> bool:
|
| 614 |
if os.environ.get("HF_SPACE_ID") or os.environ.get("SPACE_ID"):
|
| 615 |
return True
|
|
@@ -617,28 +743,27 @@ def is_running_on_spaces() -> bool:
|
|
| 617 |
|
| 618 |
@st.cache_resource(show_spinner=False)
|
| 619 |
def get_hf_router_client() -> OpenAI:
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
)
|
| 624 |
|
| 625 |
-
def
|
| 626 |
-
|
| 627 |
-
return "", "Missing HF_TOKEN (or HUGGINGFACEHUB_API_TOKEN)"
|
| 628 |
try:
|
| 629 |
client = get_hf_router_client()
|
| 630 |
completion = client.chat.completions.create(
|
| 631 |
-
model=
|
| 632 |
messages=[
|
| 633 |
-
{"role": "system", "content": "You are a helpful assistant."},
|
| 634 |
{"role": "user", "content": prompt},
|
| 635 |
],
|
| 636 |
max_tokens=MAX_GENERATION_TOKENS,
|
| 637 |
temperature=0.2,
|
| 638 |
)
|
| 639 |
-
return (completion.choices[0].message.content or "").strip(), None
|
| 640 |
except Exception as e:
|
| 641 |
-
return "", str(e)
|
| 642 |
|
| 643 |
def ollama_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str]]:
|
| 644 |
url = f"{OLLAMA_BASE_URL}/api/chat"
|
|
@@ -660,7 +785,7 @@ def ollama_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str,
|
|
| 660 |
except Exception as e:
|
| 661 |
return "", str(e)
|
| 662 |
|
| 663 |
-
def llm_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str]]:
|
| 664 |
"""
|
| 665 |
Routes generation to HF if configured; otherwise falls back to Ollama.
|
| 666 |
Prefer explicit env var if you want:
|
|
@@ -669,14 +794,16 @@ def llm_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Op
|
|
| 669 |
backend = (os.environ.get("RAG_LLM_BACKEND", "") or "").strip().lower()
|
| 670 |
|
| 671 |
if backend == "hf-router":
|
| 672 |
-
return
|
| 673 |
if backend == "ollama":
|
| 674 |
-
|
|
|
|
| 675 |
if is_running_on_spaces():
|
| 676 |
-
return
|
| 677 |
if (HF_TOKEN or "").strip():
|
| 678 |
-
return
|
| 679 |
-
|
|
|
|
| 680 |
|
| 681 |
def github_create_issue(title: str, body: str, labels: Optional[List[str]] = None) -> Tuple[Optional[int], Optional[str]]:
|
| 682 |
global _GITHUB_TOKEN_LOGGED
|
|
@@ -746,39 +873,6 @@ button[aria-label^="MCP •"]::before{content:"MCP";position:absolute;left:0.6re
|
|
| 746 |
|
| 747 |
if "is_thinking" not in st.session_state:
|
| 748 |
st.session_state["is_thinking"] = False
|
| 749 |
-
|
| 750 |
-
with st.sidebar:
|
| 751 |
-
st.markdown(f"**Company:** {COMPANY_NAME}")
|
| 752 |
-
st.markdown(f"**Contact:** {COMPANY_EMAIL} · {COMPANY_PHONE}")
|
| 753 |
-
st.caption(COMPANY_ABOUT)
|
| 754 |
-
st.write("")
|
| 755 |
-
st.subheader("Support")
|
| 756 |
-
st.caption("If an answer is not found in the dataset, you can create a support ticket (GitHub issue).")
|
| 757 |
-
st.session_state.setdefault("open_ticket_ui", False)
|
| 758 |
-
if st.button("Open ticket form", use_container_width=True, disabled=st.session_state["is_thinking"]):
|
| 759 |
-
st.session_state["open_ticket_ui"] = True
|
| 760 |
-
st.write("")
|
| 761 |
-
st.subheader("LLM")
|
| 762 |
-
st.markdown(f"- Active model: `{HF_MODEL}`")
|
| 763 |
-
st.write("")
|
| 764 |
-
st.subheader("Embedding model (retrieval)")
|
| 765 |
-
st.code(EMBED_MODEL)
|
| 766 |
-
st.write("")
|
| 767 |
-
st.subheader("Retrieval settings")
|
| 768 |
-
st.caption(f"book_k={BOOK_K}, article_k={ARTICLE_K}, per_doc_cap={PER_DOC_CAP}, overlap_filter={OVERLAP_FILTER}")
|
| 769 |
-
st.markdown("### Dataset Stats")
|
| 770 |
-
ts = st.session_state.get("token_stats")
|
| 771 |
-
if ts:
|
| 772 |
-
st.markdown("**Token Consumption (est.)**")
|
| 773 |
-
st.markdown(f"- Context tokens: `{ts['context_tokens']}` / `{ts['context_cap']}`")
|
| 774 |
-
st.markdown(f"- Chunks used: `{ts['chunks_used']}` / `{ts['chunks_cap']}`")
|
| 775 |
-
st.markdown(f"- Prompt tokens: `{ts['prompt_tokens']}`")
|
| 776 |
-
st.markdown(f"- Generation tokens (max): `{ts['generation_tokens']}`")
|
| 777 |
-
st.markdown(f"- **Total per request (est.):** `{ts['total_tokens']}`")
|
| 778 |
-
if ts["context_tokens"] >= int(0.9 * ts["context_cap"]):
|
| 779 |
-
st.warning("Context near token limit; answers may truncate.")
|
| 780 |
-
else:
|
| 781 |
-
st.markdown("_Ask a question to see token usage._")
|
| 782 |
@st.cache_data(show_spinner=False)
|
| 783 |
def load_dataset(path: str) -> List[Chunk]:
|
| 784 |
return read_chunks_jsonl(path)
|
|
@@ -811,8 +905,115 @@ doc_index = merge_doc_indexes(book_doc_index, article_doc_index)
|
|
| 811 |
book_stats = compute_stats(book_chunks, book_manifest, book_doc_index)
|
| 812 |
article_stats = compute_stats(article_chunks, article_manifest, article_doc_index)
|
| 813 |
embedder = load_embedder(EMBED_MODEL)
|
| 814 |
-
|
| 815 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 816 |
|
| 817 |
if "chat" not in st.session_state:
|
| 818 |
st.session_state["chat"] = []
|
|
@@ -854,42 +1055,6 @@ def parse_generated_questions(text: str) -> List[str]:
|
|
| 854 |
break
|
| 855 |
return cleaned
|
| 856 |
|
| 857 |
-
with st.sidebar:
|
| 858 |
-
st.write("")
|
| 859 |
-
st.markdown("**Books + MCP**")
|
| 860 |
-
st.write(f"Chunk length: min {book_stats['length_min']}, median {book_stats['length_median']}, max {book_stats['length_max']}")
|
| 861 |
-
st.write("")
|
| 862 |
-
st.markdown("**Articles**")
|
| 863 |
-
st.write(f"Chunk length: min {article_stats['length_min']}, median {article_stats['length_median']}, max {article_stats['length_max']}")
|
| 864 |
-
st.write("")
|
| 865 |
-
st.markdown("**By type (inferred)**")
|
| 866 |
-
for k in ["book", "mcp", "article"]:
|
| 867 |
-
total = 0
|
| 868 |
-
if k in book_stats["type_counts"]:
|
| 869 |
-
total += book_stats["type_counts"][k]
|
| 870 |
-
if k in article_stats["type_counts"]:
|
| 871 |
-
total += article_stats["type_counts"][k]
|
| 872 |
-
if total:
|
| 873 |
-
st.write(f"{k}: {total}")
|
| 874 |
-
st.write("")
|
| 875 |
-
st.session_state.setdefault("show_sources", False)
|
| 876 |
-
st.markdown('<div class="stacked-control sources-btn">', unsafe_allow_html=True)
|
| 877 |
-
if st.button("Sources (click to expand the list)", use_container_width=True, disabled=st.session_state["is_thinking"]):
|
| 878 |
-
st.session_state["show_sources"] = not st.session_state["show_sources"]
|
| 879 |
-
st.markdown("</div>", unsafe_allow_html=True)
|
| 880 |
-
if st.session_state["show_sources"]:
|
| 881 |
-
if book_stats["mcp_docs_count"]:
|
| 882 |
-
mcp_line = f"MCP: {book_stats['mcp_docs_count']} docs"
|
| 883 |
-
if book_stats["mcp_blocks_total"]:
|
| 884 |
-
mcp_line += f", {book_stats['mcp_blocks_total']} blocks"
|
| 885 |
-
st.write(mcp_line)
|
| 886 |
-
for line in book_stats["sources_lines"]:
|
| 887 |
-
st.write(line)
|
| 888 |
-
if article_stats["sources_lines"]:
|
| 889 |
-
st.write("")
|
| 890 |
-
st.markdown("**Article sources**")
|
| 891 |
-
for line in article_stats["sources_lines"]:
|
| 892 |
-
st.write(line)
|
| 893 |
|
| 894 |
def run_enhance(question: str, enhanced_key: str):
|
| 895 |
if not question or not enhanced_key:
|
|
@@ -925,9 +1090,16 @@ def run_regen():
|
|
| 925 |
"chunks_cap": INJECT_MAX_CHUNKS,
|
| 926 |
"context_cap": MAX_CONTEXT_TOKENS,
|
| 927 |
}
|
| 928 |
-
text, err = llm_chat(gen_prompt)
|
|
|
|
|
|
|
| 929 |
if err:
|
| 930 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 931 |
st.warning(f"LLM request failed: {err}")
|
| 932 |
return
|
| 933 |
if not text:
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
+
import hashlib
|
| 5 |
import html
|
| 6 |
from dataclasses import dataclass
|
| 7 |
from pathlib import Path
|
|
|
|
| 20 |
|
| 21 |
load_dotenv(Path(__file__).resolve().parent / ".env", override=True)
|
| 22 |
|
| 23 |
+
def get_persist_dir() -> str:
|
| 24 |
+
if os.path.isdir("/data") and os.access("/data", os.W_OK):
|
| 25 |
+
p = "/data/rag_cache"
|
| 26 |
+
else:
|
| 27 |
+
p = "data/cache"
|
| 28 |
+
os.makedirs(p, exist_ok=True)
|
| 29 |
+
return p
|
| 30 |
+
|
| 31 |
+
PERSIST_DIR = get_persist_dir()
|
| 32 |
+
|
| 33 |
COMPANY_NAME = "O_O.inc"
|
| 34 |
COMPANY_EMAIL = "o.obolonsky@proton.me"
|
| 35 |
COMPANY_PHONE = "+380953555919"
|
|
|
|
| 60 |
article_chunks_path=os.environ.get("RAG_ARTICLE_CHUNKS_PATH", "data/normalized/chunks_articles.jsonl"),
|
| 61 |
book_manifest_path=os.environ.get("RAG_BOOK_MANIFEST_PATH", "data/normalized/manifest_books.json"),
|
| 62 |
article_manifest_path=os.environ.get("RAG_ARTICLE_MANIFEST_PATH", "data/normalized/manifest_articles.json"),
|
| 63 |
+
book_index_path=os.environ.get("RAG_BOOK_INDEX_PATH", os.path.join(PERSIST_DIR, "index_books.faiss")),
|
| 64 |
+
article_index_path=os.environ.get("RAG_ARTICLE_INDEX_PATH", os.path.join(PERSIST_DIR, "index_articles.faiss")),
|
| 65 |
embed_model=os.environ.get("RAG_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2"),
|
| 66 |
max_context_tokens=int(os.getenv("RAG_MAX_CONTEXT_TOKENS", "6000")),
|
| 67 |
inject_max_chunks=int(os.getenv("RAG_INJECT_MAX_CHUNKS", os.getenv("RAG_MAX_CHUNKS", "6"))),
|
|
|
|
| 81 |
ARTICLE_MANIFEST_PATH = CONFIG.article_manifest_path
|
| 82 |
BOOK_INDEX_PATH = CONFIG.book_index_path
|
| 83 |
ARTICLE_INDEX_PATH = CONFIG.article_index_path
|
| 84 |
+
BOOK_META_PATH = BOOK_INDEX_PATH + ".meta.json"
|
| 85 |
+
ARTICLE_META_PATH = ARTICLE_INDEX_PATH + ".meta.json"
|
| 86 |
EMBED_MODEL = CONFIG.embed_model
|
| 87 |
MAX_CONTEXT_TOKENS = CONFIG.max_context_tokens
|
| 88 |
INJECT_MAX_CHUNKS = CONFIG.inject_max_chunks
|
|
|
|
| 95 |
OVERLAP_FILTER = CONFIG.overlap_filter
|
| 96 |
RETRIEVE_TOPK_MULT = CONFIG.retrieve_topk_mult
|
| 97 |
|
| 98 |
+
HF_BASE_URL = "https://router.huggingface.co/v1"
|
| 99 |
+
HF_MODEL_RAW = os.getenv("RAG_HF_MODEL", "Qwen/Qwen2.5-7B-Instruct-1M").strip()
|
| 100 |
+
HF_MODEL_SUFFIX = os.getenv("RAG_HF_PROVIDER_SUFFIX", "").strip()
|
| 101 |
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
|
|
|
| 102 |
|
| 103 |
OLLAMA_BASE_URL = os.environ.get("RAG_OLLAMA_URL", "http://localhost:11434").rstrip("/")
|
| 104 |
OLLAMA_MODEL = os.environ.get("RAG_OLLAMA_MODEL", "llama3.2:1b")
|
|
|
|
| 345 |
index.add(vectors)
|
| 346 |
return index
|
| 347 |
|
| 348 |
+
def file_fingerprint(path: str) -> Optional[str]:
|
| 349 |
+
try:
|
| 350 |
+
stinfo = os.stat(path)
|
| 351 |
+
except FileNotFoundError:
|
| 352 |
+
return None
|
| 353 |
+
h = hashlib.sha256()
|
| 354 |
+
h.update(f"{stinfo.st_size}:{int(stinfo.st_mtime)}".encode("utf-8"))
|
| 355 |
+
try:
|
| 356 |
+
with open(path, "rb") as f:
|
| 357 |
+
head = f.read(1024 * 1024)
|
| 358 |
+
h.update(head)
|
| 359 |
+
if stinfo.st_size > 1024 * 1024:
|
| 360 |
+
f.seek(max(0, stinfo.st_size - 1024 * 1024))
|
| 361 |
+
tail = f.read(1024 * 1024)
|
| 362 |
+
h.update(tail)
|
| 363 |
+
except OSError:
|
| 364 |
+
return None
|
| 365 |
+
return h.hexdigest()
|
| 366 |
+
|
| 367 |
+
def compute_fingerprint(kind: str, embed_model: str, chunks_path: str, manifest_path: str, params: Dict) -> str:
|
| 368 |
+
payload = {
|
| 369 |
+
"kind": kind,
|
| 370 |
+
"embed_model": embed_model,
|
| 371 |
+
"chunks_fp": file_fingerprint(chunks_path),
|
| 372 |
+
"manifest_fp": file_fingerprint(manifest_path),
|
| 373 |
+
"params": params,
|
| 374 |
+
}
|
| 375 |
+
raw = json.dumps(payload, sort_keys=True).encode("utf-8")
|
| 376 |
+
return hashlib.sha256(raw).hexdigest()
|
| 377 |
+
|
| 378 |
+
def load_meta(path: str) -> Dict:
|
| 379 |
+
if not Path(path).exists():
|
| 380 |
+
return {}
|
| 381 |
+
try:
|
| 382 |
+
return json.loads(Path(path).read_text(encoding="utf-8"))
|
| 383 |
+
except Exception:
|
| 384 |
+
return {}
|
| 385 |
+
|
| 386 |
+
def save_meta(path: str, meta: Dict) -> None:
|
| 387 |
+
tmp = f"{path}.tmp"
|
| 388 |
+
Path(tmp).write_text(json.dumps(meta, indent=2, sort_keys=True), encoding="utf-8")
|
| 389 |
+
os.replace(tmp, path)
|
| 390 |
+
|
| 391 |
def load_or_build_index(
|
| 392 |
+
kind: str,
|
| 393 |
chunks: List[Chunk],
|
| 394 |
embedder: SentenceTransformer,
|
| 395 |
+
chunks_path: str,
|
| 396 |
+
manifest_path: str,
|
| 397 |
index_path: str,
|
| 398 |
+
meta_path: str,
|
| 399 |
+
*,
|
| 400 |
+
params: Optional[Dict] = None,
|
| 401 |
+
fingerprint: Optional[str] = None,
|
| 402 |
+
) -> Tuple[faiss.Index, Dict]:
|
| 403 |
p = Path(index_path)
|
| 404 |
+
if params is None:
|
| 405 |
+
params = {
|
| 406 |
+
"normalize_embeddings": True,
|
| 407 |
+
"dim": getattr(embedder, "get_sentence_embedding_dimension", lambda: None)(),
|
| 408 |
+
"engine": "faiss",
|
| 409 |
+
}
|
| 410 |
+
if fingerprint is None:
|
| 411 |
+
fingerprint = compute_fingerprint(kind, EMBED_MODEL, chunks_path, manifest_path, params)
|
| 412 |
+
if p.exists() and p.stat().st_size > 0 and Path(meta_path).exists():
|
| 413 |
+
meta = load_meta(meta_path)
|
| 414 |
+
if meta.get("fingerprint") == fingerprint:
|
| 415 |
+
return faiss.read_index(str(p)), meta
|
| 416 |
+
|
| 417 |
texts = [c.text for c in chunks]
|
| 418 |
+
show_progress = os.getenv("RAG_SHOW_EMBED_PROGRESS", "0") == "1"
|
| 419 |
+
with st.spinner(f"Building {kind} retrieval index (first run or dataset changed)..."):
|
| 420 |
+
vecs = embedder.encode(
|
| 421 |
+
texts,
|
| 422 |
+
batch_size=32,
|
| 423 |
+
show_progress_bar=show_progress,
|
| 424 |
+
normalize_embeddings=True,
|
| 425 |
+
)
|
| 426 |
vecs = np.asarray(vecs, dtype="float32")
|
| 427 |
index = build_faiss_index(vecs)
|
| 428 |
p.parent.mkdir(parents=True, exist_ok=True)
|
| 429 |
faiss.write_index(index, str(p))
|
| 430 |
+
meta = {
|
| 431 |
+
"fingerprint": fingerprint,
|
| 432 |
+
"kind": kind,
|
| 433 |
+
"embed_model": EMBED_MODEL,
|
| 434 |
+
"chunks_path": chunks_path,
|
| 435 |
+
"manifest_path": manifest_path,
|
| 436 |
+
"params": params,
|
| 437 |
+
"built_at": datetime.now(timezone.utc).isoformat(),
|
| 438 |
+
}
|
| 439 |
+
save_meta(meta_path, meta)
|
| 440 |
+
return index, meta
|
| 441 |
|
| 442 |
def retrieve(query: str, embedder: SentenceTransformer, index: faiss.Index, chunks: List[Chunk], k: int = 8) -> List[Tuple[float, Chunk]]:
|
| 443 |
qv = embedder.encode([query], normalize_embeddings=True)
|
|
|
|
| 685 |
"chunks_cap": INJECT_MAX_CHUNKS,
|
| 686 |
"context_cap": MAX_CONTEXT_TOKENS,
|
| 687 |
}
|
| 688 |
+
answer, err, meta = llm_chat(prompt)
|
| 689 |
+
if meta and meta.get("model"):
|
| 690 |
+
st.session_state["active_model"] = meta["model"]
|
| 691 |
if err:
|
| 692 |
+
if is_model_not_supported(err):
|
| 693 |
+
render_model_recommendations()
|
| 694 |
+
with st.expander("Model error details"):
|
| 695 |
+
st.code(err)
|
| 696 |
+
else:
|
| 697 |
+
st.error(err)
|
| 698 |
return f"Model error: {err}", citations, False
|
| 699 |
if not answer:
|
| 700 |
st.error("Empty response from model")
|
|
|
|
| 708 |
"Keep answers concise. Cite sources using the provided citation tags exactly."
|
| 709 |
)
|
| 710 |
|
| 711 |
+
def get_effective_hf_model() -> str:
|
| 712 |
+
if HF_MODEL_SUFFIX and ":" not in HF_MODEL_RAW:
|
| 713 |
+
return f"{HF_MODEL_RAW}:{HF_MODEL_SUFFIX}"
|
| 714 |
+
return HF_MODEL_RAW
|
| 715 |
+
|
| 716 |
+
RECOMMENDED_MODELS = [
|
| 717 |
+
"Qwen/Qwen2.5-7B-Instruct-1M:featherless-ai",
|
| 718 |
+
"Qwen/Qwen2.5-7B-Instruct:featherless-ai",
|
| 719 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 720 |
+
"HuggingFaceTB/SmolLM3-3B",
|
| 721 |
+
"google/gemma-2-9b-it",
|
| 722 |
+
]
|
| 723 |
+
|
| 724 |
+
def is_model_not_supported(err: str) -> bool:
|
| 725 |
+
s = (err or "").lower()
|
| 726 |
+
return "model_not_supported" in s or "not supported by any provider you have enabled" in s
|
| 727 |
+
|
| 728 |
+
def render_model_recommendations() -> None:
|
| 729 |
+
st.error("HF Router: model is not supported by your enabled providers.")
|
| 730 |
+
st.markdown("**Fix options:**")
|
| 731 |
+
st.markdown("- Use the provider-suffixed model id shown on the model page (e.g. `...:featherless-ai`).")
|
| 732 |
+
st.markdown("- Or enable additional Inference Providers in your HF account settings.")
|
| 733 |
+
st.markdown("- Or switch to a model that is served by a provider you have enabled.")
|
| 734 |
+
st.markdown("**Try one of these model IDs:**")
|
| 735 |
+
for mid in RECOMMENDED_MODELS:
|
| 736 |
+
st.code(mid)
|
| 737 |
+
st.markdown("Set `RAG_HF_MODEL` to one of the above, or set `RAG_HF_PROVIDER_SUFFIX=featherless-ai` for Qwen.")
|
| 738 |
+
|
| 739 |
def is_running_on_spaces() -> bool:
|
| 740 |
if os.environ.get("HF_SPACE_ID") or os.environ.get("SPACE_ID"):
|
| 741 |
return True
|
|
|
|
| 743 |
|
| 744 |
@st.cache_resource(show_spinner=False)
|
| 745 |
def get_hf_router_client() -> OpenAI:
|
| 746 |
+
token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 747 |
+
if not token:
|
| 748 |
+
raise RuntimeError("HF_TOKEN is not set. Add it as a Hugging Face Secret.")
|
| 749 |
+
return OpenAI(base_url=HF_BASE_URL, api_key=token)
|
| 750 |
|
| 751 |
+
def hf_router_chat(prompt: str) -> Tuple[str, Optional[str], Optional[Dict[str, str]]]:
|
| 752 |
+
model_id = get_effective_hf_model()
|
|
|
|
| 753 |
try:
|
| 754 |
client = get_hf_router_client()
|
| 755 |
completion = client.chat.completions.create(
|
| 756 |
+
model=model_id,
|
| 757 |
messages=[
|
| 758 |
+
{"role": "system", "content": "You are a helpful assistant. Follow the instructions and use provided context only when required."},
|
| 759 |
{"role": "user", "content": prompt},
|
| 760 |
],
|
| 761 |
max_tokens=MAX_GENERATION_TOKENS,
|
| 762 |
temperature=0.2,
|
| 763 |
)
|
| 764 |
+
return (completion.choices[0].message.content or "").strip(), None, {"model": model_id}
|
| 765 |
except Exception as e:
|
| 766 |
+
return "", str(e), {"model": model_id}
|
| 767 |
|
| 768 |
def ollama_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str]]:
|
| 769 |
url = f"{OLLAMA_BASE_URL}/api/chat"
|
|
|
|
| 785 |
except Exception as e:
|
| 786 |
return "", str(e)
|
| 787 |
|
| 788 |
+
def llm_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str], Optional[Dict[str, str]]]:
|
| 789 |
"""
|
| 790 |
Routes generation to HF if configured; otherwise falls back to Ollama.
|
| 791 |
Prefer explicit env var if you want:
|
|
|
|
| 794 |
backend = (os.environ.get("RAG_LLM_BACKEND", "") or "").strip().lower()
|
| 795 |
|
| 796 |
if backend == "hf-router":
|
| 797 |
+
return hf_router_chat(prompt)
|
| 798 |
if backend == "ollama":
|
| 799 |
+
text, err = ollama_chat(prompt)
|
| 800 |
+
return text, err, None
|
| 801 |
if is_running_on_spaces():
|
| 802 |
+
return hf_router_chat(prompt)
|
| 803 |
if (HF_TOKEN or "").strip():
|
| 804 |
+
return hf_router_chat(prompt)
|
| 805 |
+
text, err = ollama_chat(prompt)
|
| 806 |
+
return text, err, None
|
| 807 |
|
| 808 |
def github_create_issue(title: str, body: str, labels: Optional[List[str]] = None) -> Tuple[Optional[int], Optional[str]]:
|
| 809 |
global _GITHUB_TOKEN_LOGGED
|
|
|
|
| 873 |
|
| 874 |
if "is_thinking" not in st.session_state:
|
| 875 |
st.session_state["is_thinking"] = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 876 |
@st.cache_data(show_spinner=False)
|
| 877 |
def load_dataset(path: str) -> List[Chunk]:
|
| 878 |
return read_chunks_jsonl(path)
|
|
|
|
| 905 |
book_stats = compute_stats(book_chunks, book_manifest, book_doc_index)
|
| 906 |
article_stats = compute_stats(article_chunks, article_manifest, article_doc_index)
|
| 907 |
embedder = load_embedder(EMBED_MODEL)
|
| 908 |
+
|
| 909 |
+
@st.cache_resource(show_spinner=False)
|
| 910 |
+
def get_indexes(book_fp: str, article_fp: str) -> Tuple[faiss.Index, faiss.Index]:
|
| 911 |
+
params = {
|
| 912 |
+
"normalize_embeddings": True,
|
| 913 |
+
"dim": getattr(embedder, "get_sentence_embedding_dimension", lambda: None)(),
|
| 914 |
+
"engine": "faiss",
|
| 915 |
+
}
|
| 916 |
+
book_index, _ = load_or_build_index(
|
| 917 |
+
"books",
|
| 918 |
+
book_chunks,
|
| 919 |
+
embedder,
|
| 920 |
+
BOOK_CHUNKS_PATH,
|
| 921 |
+
BOOK_MANIFEST_PATH,
|
| 922 |
+
BOOK_INDEX_PATH,
|
| 923 |
+
BOOK_META_PATH,
|
| 924 |
+
params=params,
|
| 925 |
+
fingerprint=book_fp,
|
| 926 |
+
)
|
| 927 |
+
article_index, _ = load_or_build_index(
|
| 928 |
+
"articles",
|
| 929 |
+
article_chunks,
|
| 930 |
+
embedder,
|
| 931 |
+
ARTICLE_CHUNKS_PATH,
|
| 932 |
+
ARTICLE_MANIFEST_PATH,
|
| 933 |
+
ARTICLE_INDEX_PATH,
|
| 934 |
+
ARTICLE_META_PATH,
|
| 935 |
+
params=params,
|
| 936 |
+
fingerprint=article_fp,
|
| 937 |
+
)
|
| 938 |
+
return book_index, article_index
|
| 939 |
+
|
| 940 |
+
index_params = {
|
| 941 |
+
"normalize_embeddings": True,
|
| 942 |
+
"dim": getattr(embedder, "get_sentence_embedding_dimension", lambda: None)(),
|
| 943 |
+
"engine": "faiss",
|
| 944 |
+
}
|
| 945 |
+
book_fp = compute_fingerprint("books", EMBED_MODEL, BOOK_CHUNKS_PATH, BOOK_MANIFEST_PATH, index_params)
|
| 946 |
+
article_fp = compute_fingerprint("articles", EMBED_MODEL, ARTICLE_CHUNKS_PATH, ARTICLE_MANIFEST_PATH, index_params)
|
| 947 |
+
book_index, article_index = get_indexes(book_fp, article_fp)
|
| 948 |
+
|
| 949 |
+
with st.sidebar:
|
| 950 |
+
st.markdown(f"**Company:** {COMPANY_NAME}")
|
| 951 |
+
st.markdown(f"**Contact:** {COMPANY_EMAIL} · {COMPANY_PHONE}")
|
| 952 |
+
st.caption(COMPANY_ABOUT)
|
| 953 |
+
st.write("")
|
| 954 |
+
st.subheader("Support")
|
| 955 |
+
st.caption("If an answer is not found in the dataset, you can create a support ticket (GitHub issue).")
|
| 956 |
+
st.session_state.setdefault("open_ticket_ui", False)
|
| 957 |
+
if st.button("Open ticket form", use_container_width=True, disabled=st.session_state["is_thinking"]):
|
| 958 |
+
st.session_state["open_ticket_ui"] = True
|
| 959 |
+
st.write("")
|
| 960 |
+
st.subheader("LLM")
|
| 961 |
+
st.markdown(f"- Active model: `{st.session_state.get('active_model', get_effective_hf_model())}`")
|
| 962 |
+
st.write("")
|
| 963 |
+
st.subheader("Embedding model (retrieval)")
|
| 964 |
+
st.code(EMBED_MODEL)
|
| 965 |
+
st.write("")
|
| 966 |
+
st.subheader("Retrieval settings")
|
| 967 |
+
st.caption(f"book_k={BOOK_K}, article_k={ARTICLE_K}, per_doc_cap={PER_DOC_CAP}, overlap_filter={OVERLAP_FILTER}")
|
| 968 |
+
st.markdown("### Dataset Stats")
|
| 969 |
+
st.write("")
|
| 970 |
+
st.markdown("**Books + MCP**")
|
| 971 |
+
st.write(f"Chunk length: min {book_stats['length_min']}, median {book_stats['length_median']}, max {book_stats['length_max']}")
|
| 972 |
+
st.write("")
|
| 973 |
+
st.markdown("**Articles**")
|
| 974 |
+
st.write(f"Chunk length: min {article_stats['length_min']}, median {article_stats['length_median']}, max {article_stats['length_max']}")
|
| 975 |
+
st.write("")
|
| 976 |
+
st.markdown("**By type (inferred)**")
|
| 977 |
+
for k in ["book", "mcp", "article"]:
|
| 978 |
+
total = 0
|
| 979 |
+
if k in book_stats["type_counts"]:
|
| 980 |
+
total += book_stats["type_counts"][k]
|
| 981 |
+
if k in article_stats["type_counts"]:
|
| 982 |
+
total += article_stats["type_counts"][k]
|
| 983 |
+
if total:
|
| 984 |
+
st.write(f"{k}: {total}")
|
| 985 |
+
st.write("")
|
| 986 |
+
ts = st.session_state.get("token_stats")
|
| 987 |
+
if ts:
|
| 988 |
+
st.markdown("**Token Consumption (est.)**")
|
| 989 |
+
st.markdown(f"- Context tokens: `{ts['context_tokens']}` / `{ts['context_cap']}`")
|
| 990 |
+
st.markdown(f"- Chunks used: `{ts['chunks_used']}` / `{ts['chunks_cap']}`")
|
| 991 |
+
st.markdown(f"- Prompt tokens: `{ts['prompt_tokens']}`")
|
| 992 |
+
st.markdown(f"- Generation tokens (max): `{ts['generation_tokens']}`")
|
| 993 |
+
st.markdown(f"- **Total per request (est.):** `{ts['total_tokens']}`")
|
| 994 |
+
if ts["context_tokens"] >= int(0.9 * ts["context_cap"]):
|
| 995 |
+
st.warning("Context near token limit; answers may truncate.")
|
| 996 |
+
else:
|
| 997 |
+
st.markdown("_Ask a question to see token usage._")
|
| 998 |
+
st.write("")
|
| 999 |
+
st.session_state.setdefault("show_sources", False)
|
| 1000 |
+
st.markdown('<div class="stacked-control sources-btn">', unsafe_allow_html=True)
|
| 1001 |
+
if st.button("Sources (click to expand the list)", use_container_width=True, disabled=st.session_state["is_thinking"]):
|
| 1002 |
+
st.session_state["show_sources"] = not st.session_state["show_sources"]
|
| 1003 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 1004 |
+
if st.session_state["show_sources"]:
|
| 1005 |
+
if book_stats["mcp_docs_count"]:
|
| 1006 |
+
mcp_line = f"MCP: {book_stats['mcp_docs_count']} docs"
|
| 1007 |
+
if book_stats["mcp_blocks_total"]:
|
| 1008 |
+
mcp_line += f", {book_stats['mcp_blocks_total']} blocks"
|
| 1009 |
+
st.write(mcp_line)
|
| 1010 |
+
for line in book_stats["sources_lines"]:
|
| 1011 |
+
st.write(line)
|
| 1012 |
+
if article_stats["sources_lines"]:
|
| 1013 |
+
st.write("")
|
| 1014 |
+
st.markdown("**Article sources**")
|
| 1015 |
+
for line in article_stats["sources_lines"]:
|
| 1016 |
+
st.write(line)
|
| 1017 |
|
| 1018 |
if "chat" not in st.session_state:
|
| 1019 |
st.session_state["chat"] = []
|
|
|
|
| 1055 |
break
|
| 1056 |
return cleaned
|
| 1057 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1058 |
|
| 1059 |
def run_enhance(question: str, enhanced_key: str):
|
| 1060 |
if not question or not enhanced_key:
|
|
|
|
| 1090 |
"chunks_cap": INJECT_MAX_CHUNKS,
|
| 1091 |
"context_cap": MAX_CONTEXT_TOKENS,
|
| 1092 |
}
|
| 1093 |
+
text, err, meta = llm_chat(gen_prompt)
|
| 1094 |
+
if meta and meta.get("model"):
|
| 1095 |
+
st.session_state["active_model"] = meta["model"]
|
| 1096 |
if err:
|
| 1097 |
+
if is_model_not_supported(err):
|
| 1098 |
+
render_model_recommendations()
|
| 1099 |
+
with st.expander("Model error details"):
|
| 1100 |
+
st.code(err)
|
| 1101 |
+
else:
|
| 1102 |
+
st.error(err)
|
| 1103 |
st.warning(f"LLM request failed: {err}")
|
| 1104 |
return
|
| 1105 |
if not text:
|