XQ commited on
Commit
12fcc8e
·
1 Parent(s): b098588

Update description

Browse files
Files changed (3) hide show
  1. .github/README.md +1 -1
  2. README.md +1 -1
  3. src/ui/app.py +11 -12
.github/README.md CHANGED
@@ -2,7 +2,7 @@
2
 
3
  **Live Demo:** [xq-dokumentassistent.hf.space](https://xq-dokumentassistent.hf.space) — hosted on Hugging Face Spaces
4
 
5
- A document assistant for Danish-language PDFs. Queries run through hybrid dense+BM25 search, cross-encoder reranking, and an LLM that cites the source passages in its answer.
6
 
7
  ## How it works
8
 
 
2
 
3
  **Live Demo:** [xq-dokumentassistent.hf.space](https://xq-dokumentassistent.hf.space) — hosted on Hugging Face Spaces
4
 
5
+ A document intelligence system covering PDF ingestion, semantic chunking, hybrid retrieval with reranking, and LLM-generated answers with source citations. The LLM layer is provider-agnostic. Two modes: a fixed pipeline for lightweight models, a LangGraph ReAct agent for queries that need multiple retrieval steps. Retrieval quality is evaluated with RAGAS.
6
 
7
  ## How it works
8
 
README.md CHANGED
@@ -12,7 +12,7 @@ noindex: true
12
 
13
  **Live Demo:** [xq-dokumentassistent.hf.space](https://xq-dokumentassistent.hf.space) — hosted on Hugging Face Spaces
14
 
15
- A document assistant for Danish-language PDFs. Queries run through hybrid dense+BM25 search, cross-encoder reranking, and an LLM that cites the source passages in its answer.
16
 
17
  ## How it works
18
 
 
12
 
13
  **Live Demo:** [xq-dokumentassistent.hf.space](https://xq-dokumentassistent.hf.space) — hosted on Hugging Face Spaces
14
 
15
+ A document intelligence system covering PDF ingestion, semantic chunking, hybrid retrieval with reranking, and LLM-generated answers with source citations. The LLM layer is provider-agnostic. Two modes: a fixed pipeline for lightweight models, a LangGraph ReAct agent for queries that need multiple retrieval steps. Retrieval quality is evaluated with RAGAS.
16
 
17
  ## How it works
18
 
src/ui/app.py CHANGED
@@ -41,8 +41,6 @@ TEXTS: dict[str, dict[str, str]] = {
41
  "lang_label": "Sprog",
42
  "sidebar_heading": "Om systemet",
43
  "sidebar_body": (
44
- "End-to-end RAG-prototype der gør dansksprog "
45
- "dokumenthåndtering selvbetjent.\n\n"
46
  "- **Python + FastAPI** REST-backend\n"
47
  "- **Ustruktureret data** — PDF-parsing, preprocessing, "
48
  "tre chunking-strategier\n"
@@ -64,10 +62,11 @@ TEXTS: dict[str, dict[str, str]] = {
64
  "title": "Dokumentassistent",
65
  "title_badge": "Demo",
66
  "subtitle": (
67
- "End-to-end RAG-prototype til dansksproglig dokumenthåndtering. "
68
- "Python · FastAPI · LangChain · LangGraph · Qdrant · "
69
- "Hybrid søgning (BM25 + semantisk) · Cross-encoder reranking · "
70
- "LLM-integration (provider-agnostisk) · RAGAS-evaluering · Streamlit"
 
71
  ),
72
  "search_label": "Stil et spørgsmål om ... ",
73
  "search_placeholder": "F.eks.: Hvad er reglerne for behandling af personoplysninger?",
@@ -118,8 +117,6 @@ TEXTS: dict[str, dict[str, str]] = {
118
  "lang_label": "Language",
119
  "sidebar_heading": "About the system",
120
  "sidebar_body": (
121
- "End-to-end RAG prototype that makes Danish-language "
122
- "document Q&A self-service.\n\n"
123
  "- **Python + FastAPI** REST backend\n"
124
  "- **Unstructured data** — PDF parsing, preprocessing, "
125
  "three chunking strategies\n"
@@ -141,10 +138,12 @@ TEXTS: dict[str, dict[str, str]] = {
141
  "title": "Document Assistant",
142
  "title_badge": "Demo",
143
  "subtitle": (
144
- "End-to-end RAG prototype for Danish-language document Q&A. "
145
- "Python · FastAPI · LangChain · LangGraph · Qdrant · "
146
- "Hybrid search (BM25 + semantic) · Cross-encoder reranking · "
147
- "LLM integration (provider-agnostic) · RAGAS evaluation · Streamlit"
 
 
148
  ),
149
  "search_label": "Ask a question ...",
150
  "search_placeholder": "E.g.: What are the rules for processing personal data?",
 
41
  "lang_label": "Sprog",
42
  "sidebar_heading": "Om systemet",
43
  "sidebar_body": (
 
 
44
  "- **Python + FastAPI** REST-backend\n"
45
  "- **Ustruktureret data** — PDF-parsing, preprocessing, "
46
  "tre chunking-strategier\n"
 
62
  "title": "Dokumentassistent",
63
  "title_badge": "Demo",
64
  "subtitle": (
65
+ "Et dokumentintelligens-system der dækker PDF-indlæsning, semantisk chunking, "
66
+ "hybrid søgning med reranking "
67
+ "og LLM-genererede svar med kildehenvisninger. LLM-laget er provider-agnostisk. "
68
+ "To tilstande: en fast pipeline til lette modeller og en LangGraph ReAct-agent "
69
+ "til forespørgsler der kræver flere søgetrin. Søgekvaliteten evalueres med RAGAS."
70
  ),
71
  "search_label": "Stil et spørgsmål om ... ",
72
  "search_placeholder": "F.eks.: Hvad er reglerne for behandling af personoplysninger?",
 
117
  "lang_label": "Language",
118
  "sidebar_heading": "About the system",
119
  "sidebar_body": (
 
 
120
  "- **Python + FastAPI** REST backend\n"
121
  "- **Unstructured data** — PDF parsing, preprocessing, "
122
  "three chunking strategies\n"
 
138
  "title": "Document Assistant",
139
  "title_badge": "Demo",
140
  "subtitle": (
141
+ "A document intelligence system covering PDF ingestion, semantic chunking, "
142
+ "hybrid retrieval with reranking, "
143
+ "and LLM-generated answers with source citations. The LLM layer is provider-agnostic. "
144
+ "Two modes: a fixed pipeline for lightweight models, a LangGraph ReAct agent "
145
+ "for queries that need multiple retrieval steps. "
146
+ "Retrieval quality is evaluated with RAGAS."
147
  ),
148
  "search_label": "Ask a question ...",
149
  "search_placeholder": "E.g.: What are the rules for processing personal data?",