Spaces:
Running
Running
XQ commited on
Commit ·
5ab78ea
1
Parent(s): 62a41bb
Update descriptions
Browse files- .github/README.md +1 -1
- README.md +1 -1
- scripts/ingest.py +1 -1
- src/ui/app.py +4 -4
.github/README.md
CHANGED
|
@@ -159,5 +159,5 @@ scripts/
|
|
| 159 |
ingest.py
|
| 160 |
e2e_test.py
|
| 161 |
tests/
|
| 162 |
-
docs/ # example PDFs (KU AI public documents)
|
| 163 |
```
|
|
|
|
| 159 |
ingest.py
|
| 160 |
e2e_test.py
|
| 161 |
tests/
|
| 162 |
+
docs/ # example PDFs/texts (KU AI public documents)
|
| 163 |
```
|
README.md
CHANGED
|
@@ -159,5 +159,5 @@ scripts/
|
|
| 159 |
ingest.py
|
| 160 |
e2e_test.py
|
| 161 |
tests/
|
| 162 |
-
docs/ # example PDFs (KU AI public documents)
|
| 163 |
```
|
|
|
|
| 159 |
ingest.py
|
| 160 |
e2e_test.py
|
| 161 |
tests/
|
| 162 |
+
docs/ # example PDFs or texts (KU AI public documents)
|
| 163 |
```
|
scripts/ingest.py
CHANGED
|
@@ -64,7 +64,7 @@ def main() -> None:
|
|
| 64 |
strategy_value = args.strategy or "recursive"
|
| 65 |
strategy = ChunkStrategy(strategy_value)
|
| 66 |
|
| 67 |
-
logger.info("===
|
| 68 |
logger.info("Docs directory : %s", docs_dir)
|
| 69 |
logger.info("Chunk strategy : %s", strategy.value)
|
| 70 |
logger.info("Chunk size : %d", settings.chunk_size)
|
|
|
|
| 64 |
strategy_value = args.strategy or "recursive"
|
| 65 |
strategy = ChunkStrategy(strategy_value)
|
| 66 |
|
| 67 |
+
logger.info("=== Doc Assistant — Ingestion ===")
|
| 68 |
logger.info("Docs directory : %s", docs_dir)
|
| 69 |
logger.info("Chunk strategy : %s", strategy.value)
|
| 70 |
logger.info("Chunk size : %d", settings.chunk_size)
|
src/ui/app.py
CHANGED
|
@@ -41,7 +41,7 @@ TEXTS: dict[str, dict[str, str]] = {
|
|
| 41 |
"sidebar_heading": "Om systemet",
|
| 42 |
"sidebar_body": (
|
| 43 |
"- **Python + FastAPI** REST-backend\n"
|
| 44 |
-
"- **Ustruktureret data** —
|
| 45 |
"tre chunking-strategier\n"
|
| 46 |
"- **Embedding-modeller** — flersproget semantisk "
|
| 47 |
"vektorrepræsentation\n"
|
|
@@ -63,7 +63,7 @@ TEXTS: dict[str, dict[str, str]] = {
|
|
| 63 |
"title": "Dokumentassistent",
|
| 64 |
"title_badge": "Demo",
|
| 65 |
"subtitle": (
|
| 66 |
-
"Et dokumentintelligens-system bygget på en RAG-arkitektur, dækkende
|
| 67 |
"hybrid søgning med reranking "
|
| 68 |
"og LLM-genererede svar med kildehenvisninger. LLM-laget er provider-agnostisk. "
|
| 69 |
"To tilstande: en LangGraph ReAct-agent (standard) til forespørgsler der kræver flere søgetrin, "
|
|
@@ -119,7 +119,7 @@ TEXTS: dict[str, dict[str, str]] = {
|
|
| 119 |
"sidebar_heading": "About the system",
|
| 120 |
"sidebar_body": (
|
| 121 |
"- **Python + FastAPI** REST backend\n"
|
| 122 |
-
"- **Unstructured data** —
|
| 123 |
"three chunking strategies\n"
|
| 124 |
"- **Embedding models** — multilingual semantic vector "
|
| 125 |
"representations\n"
|
|
@@ -141,7 +141,7 @@ TEXTS: dict[str, dict[str, str]] = {
|
|
| 141 |
"title": "Document Assistant",
|
| 142 |
"title_badge": "Demo",
|
| 143 |
"subtitle": (
|
| 144 |
-
"A document intelligence system built on a RAG architecture, covering
|
| 145 |
"hybrid retrieval with reranking, "
|
| 146 |
"and LLM-generated answers with source citations. The LLM layer is provider-agnostic. "
|
| 147 |
"Two modes: a LangGraph ReAct agent (default) for queries that need multiple retrieval steps, "
|
|
|
|
| 41 |
"sidebar_heading": "Om systemet",
|
| 42 |
"sidebar_body": (
|
| 43 |
"- **Python + FastAPI** REST-backend\n"
|
| 44 |
+
"- **Ustruktureret data** — File-parsing, preprocessing, "
|
| 45 |
"tre chunking-strategier\n"
|
| 46 |
"- **Embedding-modeller** — flersproget semantisk "
|
| 47 |
"vektorrepræsentation\n"
|
|
|
|
| 63 |
"title": "Dokumentassistent",
|
| 64 |
"title_badge": "Demo",
|
| 65 |
"subtitle": (
|
| 66 |
+
"Et dokumentintelligens-system bygget på en RAG-arkitektur, dækkende file-indlæsning, semantisk chunking, "
|
| 67 |
"hybrid søgning med reranking "
|
| 68 |
"og LLM-genererede svar med kildehenvisninger. LLM-laget er provider-agnostisk. "
|
| 69 |
"To tilstande: en LangGraph ReAct-agent (standard) til forespørgsler der kræver flere søgetrin, "
|
|
|
|
| 119 |
"sidebar_heading": "About the system",
|
| 120 |
"sidebar_body": (
|
| 121 |
"- **Python + FastAPI** REST backend\n"
|
| 122 |
+
"- **Unstructured data** — File parsing, preprocessing, "
|
| 123 |
"three chunking strategies\n"
|
| 124 |
"- **Embedding models** — multilingual semantic vector "
|
| 125 |
"representations\n"
|
|
|
|
| 141 |
"title": "Document Assistant",
|
| 142 |
"title_badge": "Demo",
|
| 143 |
"subtitle": (
|
| 144 |
+
"A document intelligence system built on a RAG architecture, covering file ingestion, semantic chunking, "
|
| 145 |
"hybrid retrieval with reranking, "
|
| 146 |
"and LLM-generated answers with source citations. The LLM layer is provider-agnostic. "
|
| 147 |
"Two modes: a LangGraph ReAct agent (default) for queries that need multiple retrieval steps, "
|