ArthurSrz commited on
Commit
7919734
·
verified ·
1 Parent(s): ec58083

feat: initial enirtcod.fr MVP deployment

Browse files
Files changed (7) hide show
  1. README.md +31 -7
  2. app.py +199 -0
  3. data_loader.py +83 -0
  4. requirements.txt +6 -0
  5. search.py +146 -0
  6. synthesis.py +97 -0
  7. ui_components.py +186 -0
README.md CHANGED
@@ -1,12 +1,36 @@
1
  ---
2
- title: Enirtcod
3
- emoji:
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 6.7.0
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: enirtcod
3
+ emoji: ⚖️
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: "4.44.0"
8
  app_file: app.py
9
+ pinned: true
10
+ license: apache-2.0
11
  ---
12
 
13
+ # enirtcod.fr Recherche juridique française ouverte
14
+
15
+ **enirtcod** (« doctrine » à l'envers) est une alternative open-source à Doctrine.fr.
16
+
17
+ Interrogez en une seule recherche :
18
+ - 📖 **Articles de loi** — tous les codes français (Code civil, Code du travail, etc.)
19
+ - ⚖️ **Jurisprudence** — décisions de la Cour de cassation (Judilibre)
20
+ - 📋 **Circulaires** — instructions ministérielles officielles
21
+ - 💬 **Réponses ministérielles** — questions-réponses parlementaires
22
+
23
+ ## Fonctionnalités
24
+
25
+ - **Recherche sémantique 4 sources** via FAISS + embeddings Mistral
26
+ - **Synthèse LLM** avec citations juridiques françaises (`[Code civil, art. 1240]`, `[Cass. 1re civ., 13 avr. 2023, n° 21-20.145]`)
27
+ - **Filtres** par date, juridiction, code, ministère
28
+ - **Renvois croisés** : décisions citant un article
29
+
30
+ ## Données
31
+
32
+ Dataset : [`ArthurSrz/open_codes`](https://huggingface.co/datasets/ArthurSrz/open_codes) — licence Etalab 2.0
33
+
34
+ ## Stack technique
35
+
36
+ - Gradio 4.x · FAISS · Mistral AI (`mistral-embed` + `Mistral-7B-Instruct`) · HuggingFace Inference API
app.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py — enirtcod.fr Gradio HF Space entry point.
3
+
4
+ Startup sequence:
5
+ 1. load_all_datasets() — loads 4 FAISS indexes into RAM (~60-90s cold start)
6
+ 2. Gradio Blocks layout with search bar, source selector, filter panel,
7
+ synthesis panel, and tabbed result cards.
8
+ """
9
+
10
+ import os
11
+ import gradio as gr
12
+
13
+ from data_loader import load_all_datasets, embed_query, LOADING_STATUS
14
+ from search import search_all, find_related_decisions
15
+ from synthesis import synthesize
16
+ from ui_components import build_tabs_html, build_article_card
17
+
18
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Cold start — runs once at module import (Space startup)
22
+ # ---------------------------------------------------------------------------
23
+ print("[app] Starting dataset loading… (may take up to 90s)")
24
+ DATASETS = load_all_datasets()
25
+ print(f"[app] Loading complete. Status: {LOADING_STATUS}")
26
+
27
+ # Populate filter dropdowns dynamically from loaded datasets
28
+ _code_names = []
29
+ if DATASETS.get("articles"):
30
+ try:
31
+ _code_names = sorted(set(DATASETS["articles"]["code_name"]))
32
+ except Exception:
33
+ _code_names = []
34
+
35
+ _ministeres = []
36
+ if DATASETS.get("circulaires"):
37
+ try:
38
+ _ministeres = sorted(set(m for m in DATASETS["circulaires"]["ministere"] if m))
39
+ except Exception:
40
+ _ministeres = []
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Search handler
45
+ # ---------------------------------------------------------------------------
46
+ def run_search(query: str, source_filter: str, date_from: int, date_to: int,
47
+ jurisdiction: str, code_name: str, ministere: str):
48
+ # Empty query guard
49
+ if not query.strip():
50
+ return (
51
+ gr.update(value="<p style='color:#9ca3af;font-style:italic'>Veuillez entrer une question juridique.</p>"),
52
+ gr.update(value=""),
53
+ )
54
+
55
+ # Query length guard
56
+ warning_note = ""
57
+ if len(query) > 500:
58
+ query = query[:500]
59
+ warning_note = "\n\n⚠️ *Requête tronquée à 500 caractères.*"
60
+
61
+ try:
62
+ embedding = embed_query(query, HF_TOKEN)
63
+ except ValueError as e:
64
+ return (
65
+ gr.update(value=f"<p style='color:#ef4444'>{e}</p>"),
66
+ gr.update(value=""),
67
+ )
68
+
69
+ filters = {}
70
+ if date_from:
71
+ filters["date_from"] = int(date_from)
72
+ if date_to:
73
+ filters["date_to"] = int(date_to)
74
+ if jurisdiction and jurisdiction != "Tous":
75
+ filters["jurisdiction"] = jurisdiction
76
+ if code_name and code_name != "Tous":
77
+ filters["code_name"] = code_name
78
+ if ministere and ministere != "Tous":
79
+ filters["ministere"] = ministere
80
+
81
+ results = search_all(embedding, DATASETS, source_filter=source_filter, filters=filters)
82
+
83
+ # Cross-references: enrich article results with related decisions
84
+ enriched_articles = []
85
+ for r in results.get("articles", []):
86
+ lf_id = r.get("id_legifrance", "")
87
+ related = find_related_decisions(lf_id, DATASETS.get("jurisprudence"))
88
+ enriched_articles.append((r, related))
89
+
90
+ # Build synthesis
91
+ synthesis_text = synthesize(query, results, HF_TOKEN) + warning_note
92
+
93
+ # Build article cards with cross-references
94
+ article_html = "".join(
95
+ build_article_card(r, related) for r, related in enriched_articles
96
+ )
97
+ # Temporarily replace articles list for tab builder (pass raw results for tab counts)
98
+ tabs_html = build_tabs_html(results, LOADING_STATUS)
99
+
100
+ # Inject enriched article cards into the Articles tab
101
+ if article_html and enriched_articles:
102
+ plain_article_html = "".join(build_article_card(r) for r, _ in enriched_articles)
103
+ tabs_html = tabs_html.replace(plain_article_html, article_html)
104
+
105
+ synthesis_html = f"""
106
+ <div style="font-family:system-ui,sans-serif;background:#f8fafc;border-radius:8px;
107
+ padding:16px 20px;border-left:4px solid #2563eb;margin-bottom:16px">
108
+ <p style="font-size:13px;font-weight:700;color:#2563eb;margin:0 0 10px">Synthèse juridique</p>
109
+ <div style="font-size:14px;line-height:1.7;color:#1e293b;white-space:pre-wrap">{synthesis_text}</div>
110
+ </div>"""
111
+
112
+ return gr.update(value=synthesis_html), gr.update(value=tabs_html)
113
+
114
+
115
+ # ---------------------------------------------------------------------------
116
+ # Gradio layout
117
+ # ---------------------------------------------------------------------------
118
+ LOADING_MSG = "Chargement des sources juridiques… (peut prendre jusqu'à 90 secondes)" \
119
+ if not all(LOADING_STATUS.values()) else ""
120
+
121
+ with gr.Blocks(
122
+ title="enirtcod.fr — Recherche juridique française",
123
+ css="""
124
+ .gradio-container { max-width: 1100px !important; }
125
+ footer { display: none !important; }
126
+ """,
127
+ ) as demo:
128
+
129
+ gr.HTML("""
130
+ <div style="text-align:center;padding:24px 0 12px;font-family:system-ui,sans-serif">
131
+ <h1 style="font-size:28px;font-weight:800;color:#1e293b;margin:0">⚖️ enirtcod.fr</h1>
132
+ <p style="color:#64748b;font-size:14px;margin:6px 0 0">
133
+ Alternative open-source à Doctrine.fr · Recherche dans 4 sources juridiques françaises
134
+ </p>
135
+ </div>""")
136
+
137
+ if LOADING_MSG:
138
+ gr.HTML(f"""
139
+ <div style="background:#fef3c7;border:1px solid #f59e0b;border-radius:8px;
140
+ padding:10px 16px;font-size:13px;color:#92400e;text-align:center">
141
+ ⏳ {LOADING_MSG}
142
+ </div>""")
143
+
144
+ with gr.Row():
145
+ query_box = gr.Textbox(
146
+ placeholder="Ex : Quelles sont les conditions de la responsabilité civile délictuelle ?",
147
+ label="Question juridique",
148
+ lines=2,
149
+ scale=5,
150
+ )
151
+ source_selector = gr.Dropdown(
152
+ choices=["Tous", "Articles", "Jurisprudence", "Circulaires", "Q&R"],
153
+ value="Tous",
154
+ label="Source",
155
+ scale=1,
156
+ )
157
+
158
+ search_btn = gr.Button("🔍 Rechercher", variant="primary")
159
+
160
+ with gr.Accordion("Filtres avancés", open=False):
161
+ with gr.Row():
162
+ date_from = gr.Slider(minimum=2000, maximum=2026, step=1, value=2000, label="Année depuis")
163
+ date_to = gr.Slider(minimum=2000, maximum=2026, step=1, value=2026, label="Année jusqu'à")
164
+ with gr.Row():
165
+ juris_filter = gr.Dropdown(
166
+ choices=["Tous", "Cour de cassation", "Cour d'appel"],
167
+ value="Tous",
168
+ label="Juridiction",
169
+ )
170
+ code_filter = gr.Dropdown(
171
+ choices=["Tous"] + _code_names,
172
+ value="Tous",
173
+ label="Code juridique",
174
+ )
175
+ min_filter = gr.Dropdown(
176
+ choices=["Tous"] + _ministeres,
177
+ value="Tous",
178
+ label="Ministère",
179
+ )
180
+
181
+ synthesis_out = gr.HTML(label="Synthèse")
182
+ results_out = gr.HTML(label="Résultats")
183
+
184
+ search_btn.click(
185
+ fn=run_search,
186
+ inputs=[query_box, source_selector, date_from, date_to,
187
+ juris_filter, code_filter, min_filter],
188
+ outputs=[synthesis_out, results_out],
189
+ )
190
+
191
+ query_box.submit(
192
+ fn=run_search,
193
+ inputs=[query_box, source_selector, date_from, date_to,
194
+ juris_filter, code_filter, min_filter],
195
+ outputs=[synthesis_out, results_out],
196
+ )
197
+
198
+ if __name__ == "__main__":
199
+ demo.launch()
data_loader.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ data_loader.py — Dataset loading + FAISS index construction + query embedding.
3
+
4
+ Runs at Space startup (once). Each dataset gets a FAISS index built in memory.
5
+ Graceful degradation: if one source fails, the others continue.
6
+ """
7
+
8
+ import os
9
+ import numpy as np
10
+ from datasets import load_dataset
11
+ from huggingface_hub import InferenceClient
12
+
13
+ DATASET_REPO = "ArthurSrz/open_codes"
14
+ EMBED_MODEL = "mistral-embed"
15
+ EMBED_DIM = 1024
16
+
17
+ # Tracks which sources loaded successfully
18
+ LOADING_STATUS: dict[str, bool] = {
19
+ "articles": False,
20
+ "jurisprudence": False,
21
+ "circulaires": False,
22
+ "reponses": False,
23
+ }
24
+
25
+ _datasets: dict = {}
26
+
27
+
28
+ def load_all_datasets() -> dict:
29
+ """
30
+ Load all four configs from ArthurSrz/open_codes and build FAISS indexes.
31
+ Returns dict with keys: articles, jurisprudence, circulaires, reponses.
32
+ Missing sources have value None.
33
+ """
34
+ configs = [
35
+ ("articles", "default"),
36
+ ("jurisprudence", "jurisprudence"),
37
+ ("circulaires", "circulaires"),
38
+ ("reponses", "reponses_legis"),
39
+ ]
40
+
41
+ result: dict = {}
42
+
43
+ for key, config_name in configs:
44
+ try:
45
+ print(f"[data_loader] Loading {config_name}…")
46
+ ds = load_dataset(DATASET_REPO, name=config_name, split="train")
47
+ ds.add_faiss_index(column="embedding")
48
+ result[key] = ds
49
+ LOADING_STATUS[key] = True
50
+ print(f"[data_loader] ✓ {config_name}: {len(ds)} rows, FAISS index built")
51
+ except Exception as e:
52
+ print(f"[data_loader] ✗ {config_name} failed: {e}")
53
+ result[key] = None
54
+ LOADING_STATUS[key] = False
55
+
56
+ _datasets.update(result)
57
+ return result
58
+
59
+
60
+ def embed_query(query_text: str, hf_token: str) -> list[float]:
61
+ """
62
+ Embed a query string using Mistral mistral-embed via HF Inference API.
63
+ Returns a 1024-dim float list.
64
+ Raises ValueError with user-readable message on failure.
65
+ """
66
+ try:
67
+ client = InferenceClient(token=hf_token)
68
+ response = client.feature_extraction(
69
+ text=query_text,
70
+ model=EMBED_MODEL,
71
+ )
72
+ # feature_extraction returns np.ndarray — flatten to 1D list
73
+ embedding = np.array(response).flatten().tolist()
74
+ if len(embedding) != EMBED_DIM:
75
+ raise ValueError(
76
+ f"Embedding dimension mismatch: expected {EMBED_DIM}, got {len(embedding)}"
77
+ )
78
+ return embedding
79
+ except Exception as e:
80
+ raise ValueError(
81
+ f"Impossible d'encoder la requête : {e}. "
82
+ "Vérifiez que HF_TOKEN est configuré et que le quota API n'est pas dépassé."
83
+ ) from e
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ datasets>=2.14.0
3
+ huggingface_hub>=0.20.0
4
+ faiss-cpu>=1.7.4
5
+ mistralai>=1.0.0
6
+ numpy>=1.24.0
search.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ search.py — FAISS retrieval + post-retrieval filtering across 4 legal sources.
3
+ """
4
+
5
+ import numpy as np
6
+
7
+
8
+ def search_source(ds, query_embedding: list[float], k: int, source_type: str) -> list[dict]:
9
+ """
10
+ Run FAISS nearest-neighbour search on a single dataset.
11
+ Returns top-k result dicts enriched with source_type and score.
12
+ Fetches k*5 candidates to allow for post-filter headroom.
13
+ """
14
+ if ds is None:
15
+ return []
16
+
17
+ try:
18
+ scores, results = ds.get_nearest_examples(
19
+ "embedding", np.array(query_embedding, dtype=np.float32), k=k * 5
20
+ )
21
+ except Exception as e:
22
+ print(f"[search] FAISS error on {source_type}: {e}")
23
+ return []
24
+
25
+ rows = []
26
+ for i, score in enumerate(scores):
27
+ row = {col: results[col][i] for col in results}
28
+ row["source_type"] = source_type
29
+ row["score"] = float(score)
30
+ rows.append(row)
31
+
32
+ return rows[:k]
33
+
34
+
35
+ def apply_filters(results: list[dict], filters: dict) -> list[dict]:
36
+ """
37
+ Apply post-retrieval filters. All filters are optional (None = skip).
38
+ - date_from / date_to: int years, applied to all source types
39
+ - jurisdiction: string, applied to jurisprudence only
40
+ - code_name: string, applied to articles only
41
+ - ministere: string, applied to circulaires and reponses
42
+ """
43
+ out = []
44
+ for r in results:
45
+ source = r.get("source_type", "")
46
+
47
+ # Date filter (year-based, applied to all)
48
+ if filters.get("date_from") or filters.get("date_to"):
49
+ date_str = (
50
+ r.get("article_dateDebut")
51
+ or r.get("date_decision")
52
+ or r.get("date_parution")
53
+ or r.get("date_reponse")
54
+ or ""
55
+ )
56
+ try:
57
+ year = int(str(date_str)[:4])
58
+ if filters.get("date_from") and year < filters["date_from"]:
59
+ continue
60
+ if filters.get("date_to") and year > filters["date_to"]:
61
+ continue
62
+ except (ValueError, TypeError):
63
+ pass # keep if date unparseable
64
+
65
+ # Jurisdiction filter (jurisprudence only)
66
+ if filters.get("jurisdiction") and source == "jurisprudence":
67
+ if r.get("jurisdiction") != filters["jurisdiction"]:
68
+ continue
69
+
70
+ # Code filter (articles only)
71
+ if filters.get("code_name") and source == "articles":
72
+ if r.get("code_name") != filters["code_name"]:
73
+ continue
74
+
75
+ # Ministry filter (circulaires + reponses)
76
+ if filters.get("ministere") and source in ("circulaires", "reponses"):
77
+ if r.get("ministere") != filters["ministere"]:
78
+ continue
79
+
80
+ out.append(r)
81
+ return out
82
+
83
+
84
+ def search_all(
85
+ query_embedding: list[float],
86
+ datasets_dict: dict,
87
+ source_filter: str = "Tous",
88
+ filters: dict | None = None,
89
+ ) -> dict:
90
+ """
91
+ Run search across all loaded datasets.
92
+ source_filter: "Tous" | "Articles" | "Jurisprudence" | "Circulaires" | "Q&R"
93
+ Returns dict: {articles: [...], jurisprudence: [...], circulaires: [...], reponses: [...]}
94
+ """
95
+ if filters is None:
96
+ filters = {}
97
+
98
+ source_map = {
99
+ "Articles": ["articles"],
100
+ "Jurisprudence": ["jurisprudence"],
101
+ "Circulaires": ["circulaires"],
102
+ "Q&R": ["reponses"],
103
+ }
104
+
105
+ active_sources = (
106
+ source_map.get(source_filter, ["articles", "jurisprudence", "circulaires", "reponses"])
107
+ if source_filter != "Tous"
108
+ else ["articles", "jurisprudence", "circulaires", "reponses"]
109
+ )
110
+
111
+ k_map = {"articles": 3, "jurisprudence": 3, "circulaires": 2, "reponses": 1}
112
+
113
+ result = {}
114
+ for source in ["articles", "jurisprudence", "circulaires", "reponses"]:
115
+ if source not in active_sources:
116
+ result[source] = []
117
+ continue
118
+
119
+ raw = search_source(datasets_dict.get(source), query_embedding, k_map[source], source)
120
+ result[source] = apply_filters(raw, filters) if filters else raw
121
+
122
+ return result
123
+
124
+
125
+ def find_related_decisions(article_id_legifrance: str, juris_ds) -> list[dict]:
126
+ """
127
+ Find up to 3 jurisprudence chunks that mention a given article ID in their chunk_text.
128
+ Simple O(N) string match — precomputed at startup if dataset is large.
129
+ """
130
+ if juris_ds is None or not article_id_legifrance:
131
+ return []
132
+
133
+ related = []
134
+ for row in juris_ds:
135
+ if article_id_legifrance in (row.get("chunk_text") or ""):
136
+ related.append({
137
+ "jurisdiction": row.get("jurisdiction", ""),
138
+ "date_decision": row.get("date_decision", ""),
139
+ "solution": row.get("solution", ""),
140
+ "url_judilibre": row.get("url_judilibre", ""),
141
+ "chunk_text": (row.get("chunk_text") or "")[:300],
142
+ })
143
+ if len(related) >= 3:
144
+ break
145
+
146
+ return related
synthesis.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ synthesis.py — LLM synthesis with inline French legal citations.
3
+ """
4
+
5
+ from huggingface_hub import InferenceClient
6
+
7
+ GENERATION_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
8
+
9
+ SYSTEM_PROMPT = """Tu es un assistant juridique français expert. Réponds à la question en te basant UNIQUEMENT sur les extraits numérotés fournis. N'utilise aucune connaissance extérieure.
10
+
11
+ Pour chaque affirmation, cite la source entre crochets selon le style juridique français :
12
+ - Articles de loi : [Code civil, art. 1240] ou [C. trav., art. L.1237-19]
13
+ - Décisions de justice : [Cass. 1re civ., 13 avr. 2023, n° 21-20.145] ou [CA Paris, 15 janv. 2024]
14
+ - Circulaires : [Circ. n° 2023-045, ministère du Travail]
15
+ - Réponses ministérielles : [Q. n° 12345, ministère de la Justice]
16
+
17
+ Si les extraits ne permettent pas de répondre à la question, réponds exactement : "Aucun résultat pertinent trouvé pour cette requête."
18
+ Réponds en français, en 3 à 6 phrases de prose juridique claire et structurée."""
19
+
20
+
21
+ def format_context_for_llm(results_dict: dict) -> str:
22
+ """
23
+ Build a numbered context string from all retrieved chunks.
24
+ Each chunk gets a citation key appropriate to its source type.
25
+ """
26
+ lines = []
27
+ counter = 1
28
+
29
+ source_configs = [
30
+ ("articles", _article_citation_key),
31
+ ("jurisprudence", _decision_citation_key),
32
+ ("circulaires", _circulaire_citation_key),
33
+ ("reponses", _reponse_citation_key),
34
+ ]
35
+
36
+ for source, key_fn in source_configs:
37
+ for result in results_dict.get(source, []):
38
+ snippet = (result.get("chunk_text") or "")[:500]
39
+ citation = key_fn(result)
40
+ lines.append(f"[{counter}] ({citation})\n{snippet}")
41
+ counter += 1
42
+
43
+ return "\n\n".join(lines)
44
+
45
+
46
+ def synthesize(query: str, results_dict: dict, hf_token: str) -> str:
47
+ """
48
+ Generate a prose synthesis with inline citations using Mistral 7B Instruct.
49
+ Returns the no-result message if context is empty.
50
+ """
51
+ all_empty = all(len(v) == 0 for v in results_dict.values())
52
+ if all_empty:
53
+ return "Aucun résultat pertinent trouvé pour cette requête."
54
+
55
+ context = format_context_for_llm(results_dict)
56
+
57
+ messages = [
58
+ {"role": "system", "content": SYSTEM_PROMPT},
59
+ {"role": "user", "content": f"Question : {query}\n\nExtraits :\n{context}"},
60
+ ]
61
+
62
+ try:
63
+ client = InferenceClient(token=hf_token)
64
+ response = client.chat_completion(
65
+ model=GENERATION_MODEL,
66
+ messages=messages,
67
+ max_tokens=1024,
68
+ )
69
+ return response.choices[0].message.content
70
+ except Exception as e:
71
+ return f"Erreur lors de la synthèse : {e}"
72
+
73
+
74
+ # --- Citation key helpers ---
75
+
76
+ def _article_citation_key(r: dict) -> str:
77
+ code = r.get("code_name", "Code")
78
+ num = r.get("num", r.get("id_legifrance", "?"))
79
+ return f"{code}, art. {num}"
80
+
81
+
82
+ def _decision_citation_key(r: dict) -> str:
83
+ juris = r.get("jurisdiction", "Cass.")
84
+ date = r.get("date_decision", "")
85
+ num = r.get("source_id", r.get("id_judilibre", ""))
86
+ return f"{juris}, {date}, n° {num}"
87
+
88
+
89
+ def _circulaire_citation_key(r: dict) -> str:
90
+ num = r.get("numero", r.get("source_id", "?"))
91
+ min_ = r.get("ministere", "")
92
+ return f"Circ. n° {num}, {min_}"
93
+
94
+
95
+ def _reponse_citation_key(r: dict) -> str:
96
+ num = r.get("numero_question", r.get("source_id", "?"))
97
+ return f"Q. n° {num}"
ui_components.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ui_components.py — HTML card builders for each legal source type + tab panel.
3
+ """
4
+
5
+
6
+ def build_article_card(result: dict, related_decisions: list[dict] | None = None) -> str:
7
+ code = result.get("code_name", "Code")
8
+ num = result.get("num", result.get("id_legifrance", ""))
9
+ snippet = (result.get("chunk_text") or "")[:200]
10
+ date = (result.get("article_dateDebut") or "")[:10]
11
+ lf_id = result.get("id_legifrance", "")
12
+ url = f"https://www.legifrance.gouv.fr/codes/article_lc/{lf_id}" if lf_id else "#"
13
+ etat = result.get("article_etat", "")
14
+
15
+ etat_badge = f'<span style="font-size:11px;color:#6b7280;margin-left:6px">{etat}</span>' if etat else ""
16
+
17
+ cross_ref_html = ""
18
+ if related_decisions:
19
+ mini_cards = ""
20
+ for dec in related_decisions:
21
+ dec_url = dec.get("url_judilibre", "#")
22
+ dec_date = dec.get("date_decision", "")
23
+ dec_jur = dec.get("jurisdiction", "")
24
+ dec_snip = dec.get("chunk_text", "")[:120]
25
+ mini_cards += f"""
26
+ <div style="border-left:3px solid #6366f1;padding:6px 10px;margin-top:6px;font-size:12px;color:#374151">
27
+ <strong>{dec_jur}</strong> · {dec_date}
28
+ <div style="color:#6b7280;margin-top:2px">{dec_snip}…</div>
29
+ <a href="{dec_url}" target="_blank" style="color:#6366f1;font-size:11px">→ Cour de cassation</a>
30
+ </div>"""
31
+ n = len(related_decisions)
32
+ cross_ref_html = f"""
33
+ <details style="margin-top:8px">
34
+ <summary style="cursor:pointer;color:#6366f1;font-size:13px;font-weight:600">
35
+ Voir les décisions ({n})
36
+ </summary>
37
+ {mini_cards}
38
+ </details>"""
39
+
40
+ return f"""
41
+ <div data-article-id="{lf_id}" style="border:1px solid #e5e7eb;border-radius:8px;padding:12px 16px;margin-bottom:10px;background:#fff">
42
+ <div style="display:flex;align-items:center;gap:8px;margin-bottom:6px">
43
+ <span style="background:#dbeafe;color:#1d4ed8;font-size:11px;font-weight:700;padding:2px 8px;border-radius:12px">{code}</span>
44
+ <strong style="font-size:14px">Art. {num}</strong>{etat_badge}
45
+ </div>
46
+ <p style="font-size:13px;color:#374151;margin:0 0 8px">{snippet}…</p>
47
+ <div style="font-size:12px;color:#9ca3af">
48
+ 📅 {date} &nbsp;|&nbsp;
49
+ <a href="{url}" target="_blank" style="color:#2563eb">🔗 Légifrance</a>
50
+ </div>
51
+ {cross_ref_html}
52
+ </div>"""
53
+
54
+
55
+ def build_decision_card(result: dict) -> str:
56
+ juris = result.get("jurisdiction", "")
57
+ chamber = result.get("chamber", "")
58
+ date = result.get("date_decision", "")
59
+ fiche = result.get("fiche_arret") or ""
60
+ snippet = fiche[:200] if fiche else (result.get("chunk_text") or "")[:200]
61
+ url = result.get("url_judilibre", "#")
62
+ src_id = result.get("source_id", "")
63
+
64
+ badge_label = f"{juris}" + (f" | {chamber}" if chamber else "")
65
+
66
+ return f"""
67
+ <div style="border:1px solid #e5e7eb;border-radius:8px;padding:12px 16px;margin-bottom:10px;background:#fff">
68
+ <div style="display:flex;align-items:center;gap:8px;margin-bottom:6px">
69
+ <span style="background:#fce7f3;color:#be185d;font-size:11px;font-weight:700;padding:2px 8px;border-radius:12px">{badge_label}</span>
70
+ <strong style="font-size:13px">{date}</strong>
71
+ {f'<span style="font-size:11px;color:#6b7280">n° {src_id}</span>' if src_id else ""}
72
+ </div>
73
+ <p style="font-size:13px;color:#374151;margin:0 0 8px">{snippet}…</p>
74
+ <div style="font-size:12px;color:#9ca3af">
75
+ 📅 {date} &nbsp;|&nbsp;
76
+ <a href="{url}" target="_blank" style="color:#2563eb">🔗 Cour de cassation</a>
77
+ </div>
78
+ </div>"""
79
+
80
+
81
+ def build_circulaire_card(result: dict) -> str:
82
+ ministere = result.get("ministere", "")
83
+ numero = result.get("numero", result.get("source_id", ""))
84
+ objet = (result.get("objet") or result.get("chunk_text") or "")[:200]
85
+ date = (result.get("date_parution") or "")[:10]
86
+ url = result.get("url_legifrance", "#")
87
+
88
+ return f"""
89
+ <div style="border:1px solid #e5e7eb;border-radius:8px;padding:12px 16px;margin-bottom:10px;background:#fff">
90
+ <div style="display:flex;align-items:center;gap:8px;margin-bottom:6px">
91
+ <span style="background:#d1fae5;color:#065f46;font-size:11px;font-weight:700;padding:2px 8px;border-radius:12px">Ministère : {ministere}</span>
92
+ <strong style="font-size:13px">Circ. n° {numero}</strong>
93
+ </div>
94
+ <p style="font-size:13px;color:#374151;margin:0 0 8px">{objet}…</p>
95
+ <div style="font-size:12px;color:#9ca3af">
96
+ 📅 {date} &nbsp;|&nbsp;
97
+ <a href="{url}" target="_blank" style="color:#2563eb">🔗 Légifrance</a>
98
+ </div>
99
+ </div>"""
100
+
101
+
102
+ def build_reponse_card(result: dict) -> str:
103
+ ministere = result.get("ministere", "")
104
+ num_q = result.get("numero_question", result.get("source_id", ""))
105
+ question = (result.get("question_text") or result.get("chunk_text") or "")[:200]
106
+ date = (result.get("date_reponse") or "")[:10]
107
+ url = result.get("url_legifrance", "#")
108
+
109
+ return f"""
110
+ <div style="border:1px solid #e5e7eb;border-radius:8px;padding:12px 16px;margin-bottom:10px;background:#fff">
111
+ <div style="display:flex;align-items:center;gap:8px;margin-bottom:6px">
112
+ <span style="background:#fef3c7;color:#92400e;font-size:11px;font-weight:700;padding:2px 8px;border-radius:12px">{ministere}</span>
113
+ <strong style="font-size:13px">Q. n° {num_q}</strong>
114
+ </div>
115
+ <p style="font-size:13px;color:#374151;margin:0 0 8px">{question}…</p>
116
+ <div style="font-size:12px;color:#9ca3af">
117
+ 📅 {date} &nbsp;|&nbsp;
118
+ <a href="{url}" target="_blank" style="color:#2563eb">🔗 Légifrance</a>
119
+ </div>
120
+ </div>"""
121
+
122
+
123
+ def build_tabs_html(results_dict: dict, loading_status: dict) -> str:
124
+ """
125
+ Build a 4-tab HTML panel. Each tab shows its source count in the label.
126
+ If a source failed to load, shows 'Source temporairement indisponible'.
127
+ """
128
+ tabs_config = [
129
+ ("articles", "Articles", build_article_card),
130
+ ("jurisprudence", "Jurisprudence", build_decision_card),
131
+ ("circulaires", "Circulaires", build_circulaire_card),
132
+ ("reponses", "Q&R", build_reponse_card),
133
+ ]
134
+
135
+ tab_buttons = ""
136
+ tab_panels = ""
137
+
138
+ for i, (key, label, builder) in enumerate(tabs_config):
139
+ results = results_dict.get(key, [])
140
+ count = len(results)
141
+ active = "active" if i == 0 else ""
142
+
143
+ tab_buttons += f"""
144
+ <button onclick="showTab('{key}')" id="tab-btn-{key}"
145
+ style="padding:8px 16px;border:none;background:{'#eff6ff' if i==0 else 'transparent'};
146
+ color:{'#1d4ed8' if i==0 else '#6b7280'};font-weight:{'700' if i==0 else '400'};
147
+ border-bottom:{'2px solid #1d4ed8' if i==0 else '2px solid transparent'};
148
+ cursor:pointer;font-size:14px;border-radius:4px 4px 0 0">
149
+ {label} ({count})
150
+ </button>"""
151
+
152
+ if not loading_status.get(key, False):
153
+ content = '<p style="color:#9ca3af;font-style:italic;padding:20px">Source temporairement indisponible</p>'
154
+ elif not results:
155
+ content = '<p style="color:#9ca3af;font-style:italic;padding:20px">Aucun résultat pour cette source.</p>'
156
+ else:
157
+ content = "".join(builder(r) for r in results)
158
+
159
+ display = "block" if i == 0 else "none"
160
+ tab_panels += f"""
161
+ <div id="tab-{key}" style="display:{display};padding:16px 0">
162
+ {content}
163
+ </div>"""
164
+
165
+ js = """
166
+ <script>
167
+ function showTab(key) {
168
+ ['articles','jurisprudence','circulaires','reponses'].forEach(k => {
169
+ document.getElementById('tab-' + k).style.display = (k === key) ? 'block' : 'none';
170
+ var btn = document.getElementById('tab-btn-' + k);
171
+ btn.style.background = (k === key) ? '#eff6ff' : 'transparent';
172
+ btn.style.color = (k === key) ? '#1d4ed8' : '#6b7280';
173
+ btn.style.fontWeight = (k === key) ? '700' : '400';
174
+ btn.style.borderBottom = (k === key) ? '2px solid #1d4ed8' : '2px solid transparent';
175
+ });
176
+ }
177
+ </script>"""
178
+
179
+ return f"""
180
+ <div style="font-family:system-ui,sans-serif">
181
+ <div style="border-bottom:1px solid #e5e7eb;display:flex;gap:4px">
182
+ {tab_buttons}
183
+ </div>
184
+ {tab_panels}
185
+ {js}
186
+ </div>"""