ValerioBotto commited on
Commit
aabd32c
·
0 Parent(s):

Initial clean commit without secrets

Browse files
.gitignore ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- PYTHON & BACKEND ---
2
+ venv/
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+ .env
7
+ temp_uploads/
8
+ .pytest_cache/
9
+
10
+ # --- NODE & FRONTEND ---
11
+ node_modules/
12
+ frontend/node_modules/
13
+ frontend/dist/
14
+ frontend/build/
15
+ .eslintcache
16
+
17
+ # --- OS & IDE ---
18
+ .DS_Store
19
+ Thumbs.db
20
+ .vscode/
21
+ .idea/
22
+ *.log
23
+
README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # GraphRAG
2
+ Sistema RAG avanzato che combina Neo4j e LangGraph per analisi deterministiche di PDF. Sfrutta Mistral come router strategico per query vettoriali o Cypher, GLiNER per l’estrazione di entità e BGE-Reranker per la precisione. Frontend in React con rendering Markdown e Backend in FastAPI. PDF interrogabili in chat in linguaggio naturale.
agentLogic/graph.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import StateGraph, END
2
+ from agentLogic.state import AgentState
3
+ from agentLogic.nodes import node_router, node_retriever, node_generator, node_reranker, node_rewriter
4
+
5
+ workflow = StateGraph(AgentState)
6
+
7
+ #Aggiunta Nodi
8
+ workflow.add_node("rewriter", node_rewriter)
9
+ workflow.add_node("router", node_router)
10
+ workflow.add_node("retriever", node_retriever)
11
+ workflow.add_node("reranker", node_reranker)
12
+ workflow.add_node("generator", node_generator)
13
+
14
+ #Definizione Percorso
15
+ workflow.set_entry_point("rewriter")
16
+ workflow.add_edge("rewriter", "router")
17
+ workflow.add_edge("router", "retriever")
18
+ workflow.add_edge("retriever", "reranker")
19
+ workflow.add_edge("reranker", "generator")
20
+ workflow.add_edge("generator", END)
21
+
22
+ app = workflow.compile()
23
+
24
+
25
+
agentLogic/nodes.py ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Questo file contiene la logica per i due modelli
2
+
3
+ import os
4
+ import json
5
+ import re
6
+ import logging
7
+ from groq import Groq
8
+ from mistralai import Mistral
9
+ from agentLogic.state import AgentState
10
+ from db.graph_db import GraphDB
11
+ from processingPdf.reranker import Reranker
12
+ from processingPdf.indexer import Indexer
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
17
+ mistral_client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
18
+
19
+ # Inizializziamo i modelli pesanti fuori dai nodi per caricarli una sola volta all'avvio
20
+ # Fondamentale per le performance di FastAPI
21
+ indexer_instance = Indexer()
22
+ reranker_model = Reranker()
23
+
24
+ #Estrae e pulisce il JSON dall'output dell'LLM
25
+ def extract_json(text):
26
+ try:
27
+ #Cerca il blocco tra parentesi graffe
28
+ match = re.search(r'\{.*\}', text, re.DOTALL)
29
+ if match:
30
+ return json.loads(match.group(0))
31
+ return json.loads(text)
32
+ except Exception as e:
33
+ logger.error(f"Errore nel parsare il JSON: {e}")
34
+ return {"route": "vector", "entities": [], "keywords": []}
35
+
36
+ #Nodo rewriter: Pulisce la query, corregge errori e agisce da Guardrail. precedentemente aveva anche una funzione di
37
+ # ampliamento contestuale ma ho deciso di eliminare l'espansione semantica forzata per evitare di compromettere il contesto del RAG come successo in fase di testing
38
+ def node_rewriter(state: AgentState):
39
+ user_query = state["query"]
40
+
41
+ prompt = f"""
42
+ ### ROLE
43
+ Sei un correttore ortografico e/o grammaticale e sintattico. Il tuo unico output deve essere la query corretta.
44
+
45
+ ### TASK
46
+ 1. **Correzione**: Correggi eventuali errori di ortografia, sintassi, grammaticali o di battitura.
47
+ 2. **Minimalismo**: NON AGGIUNGERE contesto, sinonimi o interpretazioni. Se la domanda è chiara (es. "cos'è la persona"), lasciala IDENTICA.
48
+
49
+ ### ESEMPI
50
+ Input: "cos'è la 'persona'?" -> Output: cos'è la 'persona'?
51
+ Input: "spiegami il prompt pstern" -> Output: spiegami il prompt pattern
52
+ Input: "Quali sono i sui sinotmi?" -> Output: Quali sono i suoi sintomi?
53
+
54
+ USER QUERY: "{user_query}"
55
+ OUTPUT:
56
+ """
57
+
58
+ completion = groq_client.chat.completions.create(
59
+ model="llama-3.1-8b-instant",
60
+ messages=[
61
+ {"role": "system", "content": "Sei un correttore di testo puro. Non salutare. Non spiegare. Restituisci SOLO il risultato."},
62
+ {"role": "user", "content": prompt}
63
+ ],
64
+ temperature=0.0
65
+ )
66
+
67
+ rewritten_query = completion.choices[0].message.content.strip()
68
+ #pulizia ulteriore
69
+ rewritten_query = rewritten_query.replace('Output:', '').replace('"', '').strip()
70
+
71
+ print(f"DEBUG - Query Rewriting: '{user_query}' -> '{rewritten_query}'")
72
+
73
+ return {"query": rewritten_query}
74
+
75
+ #Nodo 1: utilizzo mistral per decidere la strategia di ricerca"
76
+ def node_router(state: AgentState):
77
+ """
78
+ Nodo 1: Utilizza Mistral per decidere la strategia di ricerca.
79
+ Implementa Role Prompting, Few-Shot, Constraint Enforcement e Output Structuring.
80
+ """
81
+
82
+ # Nota: Usiamo le doppie parentesi graffe {{ }} per includere JSON letterali nelle f-strings.
83
+ prompt = f"""
84
+ ### ROLE
85
+ Sei l'Analizzatore Logico di un sistema RAG (Retrieval-Augmented Generation) avanzato.
86
+ Il tuo unico compito è decostruire la domanda dell'utente per determinare la migliore strategia di recupero dati da un database a grafo Neo4j.
87
+
88
+ ### DOMINIO CONTESTUALE
89
+ I documenti analizzati possono appartenere a domini eterogenei: medico-sanitario, tecnico, giuridico, ecc.
90
+ Individua termini specialistici, acronimi, unità di misura e concetti teorici specifici del dominio.
91
+
92
+ ### TASK: GENERAZIONE JSON
93
+ Analizza la domanda utente e restituisci esclusivamente un oggetto JSON valido:
94
+ 1. `route`:
95
+ - "cypher": per entità specifiche e univoche (nomi, date precise) e risposte puntuali.
96
+ - "vector": per domande concettuali, descrittive o che richiedono similarità semantica.
97
+ - "hybrid": per domande che combinano entità specifiche con concetti complessi. da usare quando l'utente
98
+ menziona entità specifiche (nomi, termini tecnici) ma chiede spiegazioni, esempi o relazioni tra essi.
99
+ 2. `entities`: Lista delle entità nominate (es. ["Mario Rossi", "BMI"]).
100
+ 3. `keywords`: Lista di 3-5 keyword per la ricerca vettoriale (sostantivi normalizzati).
101
+
102
+ ### ESEMPI (Few-Shot)
103
+ User: "Quali sono i valori di BMI Z-score per Amanda nel 2024?"
104
+ Output: {{ "route": "cypher", "entities": ["Amanda", "BMI Z-score", "2024"], "keywords": ["valori bmi z-score", "paziente amanda", "dati 2024"] }}
105
+
106
+ User: "Spiegami come la dieta influisce sulla crescita dei bambini con CF."
107
+ Output: {{ "route": "vector", "entities": ["CF"], "keywords": ["dieta fibrosi cistica", "crescita bambini", "nutrizione"] }}
108
+
109
+ ### VINCOLI RIGIDI (PENALITÀ DI OUTPUT)
110
+ - L’output deve essere SOLO JSON valido.
111
+ - NON aggiungere introduzioni, commenti o spiegazioni.
112
+ - Ogni carattere extra al di fuori del JSON sarà considerato un fallimento critico.
113
+
114
+ ### ESEMPIO DI STRUTTURA ATTESA
115
+ User: "Qual è il BMI di Mario Rossi nel 2023?"
116
+ Output: {{
117
+ "route": "hybrid",
118
+ "entities": ["Mario Rossi", "BMI", "2023"],
119
+ "keywords": ["valutazione BMI", "Mario Rossi", "cartella clinica 2023"]
120
+ }}
121
+
122
+ ### DOMANDA DA ANALIZZARE
123
+ "{state['query']}"
124
+ """
125
+
126
+ response = mistral_client.chat.complete(
127
+ model="labs-devstral-small-2512",
128
+ messages=[{"role": "user", "content": prompt}]
129
+ )
130
+
131
+ # Estrazione e parsing del JSON dalla risposta del modello
132
+ content = response.choices[0].message.content
133
+ intent_json = extract_json(content)
134
+
135
+ # Fondamentale: restituiamo il dizionario parsato per i nodi successivi
136
+ return {"intent_data": intent_json}
137
+
138
+ # Esegue la ricerca ibrida su neo4j basandosi sull'intent
139
+ def node_retriever(state: AgentState):
140
+ intent = state["intent_data"]
141
+ target_file = state["filename"]
142
+ db = GraphDB()
143
+ collected_chunks = []
144
+ seen_ids = set()
145
+
146
+ # Stampo l'intent per monitorare le decisioni del Router in tempo reale
147
+ print(f"DEBUG - Intent ricevuto: {intent}")
148
+
149
+ # 1. RICERCA PER ENTITÀ (STRATEGIA CYPHER)
150
+ # Se il router ha scelto 'cypher' o 'hybrid', interrogo il grafo tramite le entità estratte
151
+ if intent.get("route") in ["cypher", "hybrid"]:
152
+ entities = intent.get("entities", [])
153
+ for entity in entities:
154
+ entity_name = entity["value"] if isinstance(entity, dict) else entity
155
+
156
+ # Nota: Al momento entity_search esegue una ricerca globale.
157
+ results = db.entity_search(entity_name)
158
+ for res in results:
159
+ # Aggiungo un controllo di sicurezza per assicurarmi di prendere solo i chunk del documento corrente
160
+ if res["chunk_id"] not in seen_ids and res.get("filename") == target_file:
161
+ content_text = res.get("node_content", "")
162
+ collected_chunks.append(f"[Entity Match: {entity_name}] {content_text}")
163
+ seen_ids.add(res["chunk_id"])
164
+
165
+ # 2. RICERCA VETTORIALE (STRATEGIA SEMANTICA)
166
+ # Se il router ha scelto 'vector' o 'hybrid', utilizzo gli embeddings per la similarità
167
+ if intent.get("route") in ["vector", "hybrid"]:
168
+ keywords = intent.get("keywords", [])
169
+ search_query = " ".join(keywords) if keywords else state["query"]
170
+
171
+ # Uso l'istanza caricata all'avvio del server
172
+ embedding = indexer_instance.generate_embeddings(search_query)
173
+
174
+ # Ho deciso di passare 'target_file' come parametro 'filename' per attivare il filtro Cypher
175
+ # interno alla query vettoriale e isolare il documento
176
+ vector_results = db.query_vector_index(
177
+ "chunk_embeddings_index",
178
+ embedding,
179
+ k=15,
180
+ filename=target_file
181
+ )
182
+
183
+ # Estraggo lo score del miglior risultato locale per decidere se attivare la ricerca globale
184
+ max_local_score = vector_results[0]["score"] if vector_results else 0
185
+ print(f"DEBUG - Risultati vettoriali trovati per {target_file}: {len(vector_results)} (Max Score: {max_local_score})")
186
+
187
+ for res in vector_results:
188
+ if res["chunk_id"] not in seen_ids:
189
+ #includo metadati nel testo del chunk per permettere al generatore di citare la fonte
190
+ #mi interessa sapere, nella risposta finale, da che file è stata tratta l'informazione
191
+ source_info = f"[Fonte: {res.get('filename')} | Sezione: {res.get('section', 'N/A')}]"
192
+ content_text = res.get("node_content", "")
193
+ collected_chunks.append(f"{source_info} [Vector Match] {content_text}")
194
+ seen_ids.add(res["chunk_id"])
195
+
196
+ #attivo la GLOBAL VECTOR SEARCH se la pertinenza locale è bassa (< 0.7)
197
+ if max_local_score < 0.7:
198
+ print(f"DEBUG - Score locale basso ({max_local_score}), attivo Global Vector Search...")
199
+
200
+ #eseguo semplicemente la query senza passare il filename per cercare in tutto il database
201
+ global_results = db.query_vector_index(
202
+ "chunk_embeddings_index",
203
+ embedding,
204
+ k=5,
205
+ filename=None
206
+ )
207
+
208
+ for res in global_results:
209
+ #evito duplicati se per caso la ricerca globale ripesca chunk già visti nel locale
210
+ if res["chunk_id"] not in seen_ids:
211
+ source_info = f"[Fonte: {res.get('filename')} | Sezione: {res.get('section', 'N/A')}]"
212
+ content_text = res.get("node_content", "")
213
+ collected_chunks.append(f"{source_info} [Global Vector Match] {content_text}")
214
+ seen_ids.add(res["chunk_id"])
215
+
216
+ # se i metodi precedenti non producono risultati,
217
+ # forzo una ricerca vettoriale sull'intera query originale filtrata per il file corrente
218
+ if not collected_chunks:
219
+ print(f"DEBUG - Fallback: nessuna informazione con keyword in {target_file}, procedo con query completa.")
220
+ embedding_fallback = indexer_instance.generate_embeddings(state["query"])
221
+
222
+ # Anche nel fallback, forzo il filtro sul filename per evitare contaminazioni
223
+ fallback_results = db.query_vector_index(
224
+ "chunk_embeddings_index",
225
+ embedding_fallback,
226
+ k=3,
227
+ filename=target_file
228
+ )
229
+ for res in fallback_results:
230
+ if res["chunk_id"] not in seen_ids:
231
+ source_info = f"[Fonte: {res.get('filename')} | Sezione: {res.get('section', 'N/A')}]"
232
+ content_text = res.get("node_content", "")
233
+ collected_chunks.append(f"{source_info} [Fallback Match] {content_text}")
234
+ seen_ids.add(res["chunk_id"])
235
+
236
+ # gestisco esplicitamente il caso di assenza totale di dati per evitare errori nel Generator
237
+ if not collected_chunks:
238
+ collected_chunks = [f"Nessuna informazione specifica trovata nel database per il file {target_file}."]
239
+
240
+ db.close()
241
+ # stampo quanti chunk sto effettivamente restituendo allo stato
242
+ print(f"DEBUG - RETRIEVER sta inviando allo stato {len(collected_chunks)} chunk")
243
+
244
+ return {"context_chunks": collected_chunks}
245
+
246
+ #Nodo reranker, ottiene i 15 chunks più pertinenti dal retriever e si occupa di prendere i 5 veramente più pertinenti rispetto alla domanda dell'utente
247
+ def node_reranker(state: AgentState):
248
+ query = state["query"]
249
+ chunks = state.get("context_chunks", [])
250
+ intent = state.get("intent_data", {})
251
+
252
+ #decido di eseguire il reranking sempre se abbiamo più di 5 chunk,
253
+ #a prescindere dalla rotta, per garantire la qualità.
254
+ if len(chunks) <= 5:
255
+ return {"context_chunks": chunks}
256
+
257
+ print(f"DEBUG Reranker: Analizzo {len(chunks)} chunk...")
258
+
259
+ #eseguo il reranking tramite il modello BGE-Reranker-v2-m3
260
+ refined_chunks = reranker_model.rerank(query, chunks, top_n=5)
261
+ print(f"DEBUG Reranker: Ho selezionato i {len(refined_chunks)} migliori.")
262
+ return {"context_chunks": refined_chunks}
263
+
264
+ #Nodo finale: uso Llama per la risposta
265
+ def node_generator(state: AgentState):
266
+ chunks = state.get('context_chunks', [])
267
+ print(f"DEBUG - Numero di chunk passati al generatore: {len(chunks)}")
268
+
269
+ context = "\n\n".join(chunks)
270
+
271
+ if not context.strip():
272
+ print("DEBUG - ATTENZIONE: Il contesto finale per l'LLM è vuoto!!!")
273
+
274
+ #ho deciso di determinare l'approccio in base ai tag presenti nei chunk reali
275
+ has_vector = any("[Vector Match]" in c or "[Fallback Match]" in c for c in chunks)
276
+ has_entity = any("[Entity Match]" in c for c in chunks)
277
+
278
+ if has_vector and has_entity:
279
+ approach = "Hybrid"
280
+ elif has_vector:
281
+ approach = "Vector Match"
282
+ elif has_entity:
283
+ approach = "Entity Match"
284
+ else:
285
+ approach = "Non applicabile"
286
+
287
+ prompt = f"""
288
+ ### ROLE
289
+ Sei un assistente virtuale altamente specializzato nell’analisi di documenti medici e tecnico-scientifici.
290
+ Il tuo compito è generare risposte **accurate, verificabili ed evidence-based**, evitando qualsiasi forma di inferenza non supportata dalle fonti fornite.
291
+
292
+ ---
293
+
294
+ ### CONTESTO FORNITO (FONTI VERIFICATE)
295
+ Di seguito sono riportati uno o più frammenti di documenti (chunk) estratti dal database.
296
+ Ogni frammento è preceduto dall’etichetta del metodo di recupero:
297
+ - [Vector Match]: recupero per similarità semantica
298
+ - [Entity Match]: recupero basato su entità esplicite
299
+
300
+ Usa **solo ed esclusivamente** le informazioni contenute in questi frammenti.
301
+
302
+ ---------------------
303
+ {context}
304
+ ---------------------
305
+
306
+ ### ISTRUZIONI DI CITAZIONE OBBLIGATORIE
307
+ 1. Se utilizzi informazioni provenienti da un file diverso da '{state['filename']}',
308
+ devi indicare esplicitamente a fine risposta da quale file e sezione hai tratto l'integrazione.
309
+ 2. Usa il formato: "Fonti esterne utilizzate: [Nome File] (Sezione)".
310
+
311
+ ---
312
+
313
+ ### ISTRUZIONI DI GENERAZIONE (OBBLIGATORIE)
314
+
315
+ 1. **Aderenza totale alle fonti**
316
+ - Rispondi alla domanda utilizzando **unicamente** le informazioni presenti nel contesto fornito.
317
+ - Non introdurre conoscenze esterne, linee guida generali o interpretazioni personali.
318
+
319
+ 2. **Gestione dell’informazione insufficiente**
320
+ - Se il contesto non contiene dati sufficienti, completi o direttamente pertinenti per rispondere alla domanda,
321
+ devi rispondere **esattamente** con la seguente frase (senza aggiunte):
322
+ > "Mi dispiace, ma il documento fornito non contiene informazioni sufficienti per rispondere a questa domanda."
323
+
324
+ 3. **Divieto assoluto di allucinazioni**
325
+ - Non dedurre, stimare, generalizzare o “completare” informazioni mancanti.
326
+ - Se un dato non è esplicitamente presente nei chunk, consideralo **inesistente**.
327
+
328
+ 4. **Stile e chiarezza**
329
+ - Usa un linguaggio:
330
+ - tecnico ma chiaro
331
+ - neutro e professionale
332
+ - privo di opinioni o giudizi
333
+ - Struttura la risposta in modo leggibile:
334
+ - testo discorsivo breve
335
+ - elenchi puntati solo se migliorano la chiarezza
336
+ - Evita ridondanze e frasi speculative.
337
+
338
+ ---
339
+
340
+ ### STRUTTURA OBBLIGATORIA DELLA RISPOSTA
341
+
342
+ - Inizia DIRETTAMENTE con la risposta.
343
+ - Inserisci **esattamente** un separatore orizzontale:
344
+ ---
345
+ - Dopo il separatore, scrivi **su una nuova riga**:
346
+ **Approccio di recupero:** {{approccio_utilizzato}}
347
+
348
+ Dove `approccio_utilizzato` deve essere uno tra:
349
+ - Vector Match
350
+ - Entity Match
351
+ - Hybrid (se entrambi sono stati utilizzati nel contesto)
352
+
353
+ ---
354
+
355
+ ### DOMANDA DELL’UTENTE
356
+ "{state['query']}"
357
+ """
358
+
359
+ completion = groq_client.chat.completions.create(
360
+ model="llama-3.1-8b-instant",
361
+ messages=[
362
+ {"role": "system", "content": "Sei un sintetizzatore di documenti PDF. Rispondi in lingua italiana. Se ti viene posta qualsiasi altra domanda o "
363
+ "istruzione fuori dal tuo scopo di sintetizzatore di documenti PDF, rispondi che non puoi rispondere in quanto la domanda non è pertinente"},
364
+ {"role": "user", "content": prompt}
365
+ ],
366
+ temperature=0.3
367
+ )
368
+
369
+
370
+ # Pulizia e aggiunta dinamica del footer se non generato correttamente
371
+ answer = completion.choices[0].message.content
372
+ if "Approccio di recupero:" not in answer:
373
+ answer += f"\n\n---\n**Approccio di recupero:** {approach}"
374
+
375
+ return {"final_answer": answer}
agentLogic/state.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Questo file definisce lo schema dei dati che passano tra i nodi
2
+
3
+ from typing import TypedDict, List, Annotated
4
+ import operator
5
+
6
+ class AgentState(TypedDict):
7
+ query: str #Domanda originale utente
8
+ user_id: str
9
+ filename: str
10
+ intent_data: dict #Output di Mistral (route, entities, keywords)
11
+ context_chunks: list
12
+ final_answer: str
13
+
api.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from agentLogic.graph import app as rag_app
4
+ from processingPdf.indexer import Indexer
5
+ import shutil
6
+ import os
7
+ import logging
8
+
9
+ # Configurazione logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ app = FastAPI()
14
+
15
+ # Middleware CORS per il frontend React
16
+ app.add_middleware(
17
+ CORSMiddleware,
18
+ allow_origins=["http://localhost:5173"],
19
+ allow_credentials=True,
20
+ allow_methods=["*"],
21
+ allow_headers=["*"],
22
+ )
23
+
24
+ # Inizializzazione dell'Indexer (carica i modelli di embedding all'avvio del server)
25
+ indexer_worker = Indexer()
26
+
27
+ @app.post("/upload")
28
+ async def upload_pdf(file: UploadFile = File(...), user_id: str = Form(...)):
29
+ """
30
+ Endpoint per caricare un PDF e indicizzarlo in Neo4j.
31
+ """
32
+ # Cartella temporanea per processare il file
33
+ upload_dir = "temp_uploads"
34
+ os.makedirs(upload_dir, exist_ok=True)
35
+ file_path = os.path.join(upload_dir, file.filename)
36
+
37
+ try:
38
+ # 1. Salva il file fisicamente sul server
39
+ logger.info(f"Ricezione file: {file.filename} per l'utente: {user_id}")
40
+ with open(file_path, "wb") as buffer:
41
+ shutil.copyfileobj(file.file, buffer)
42
+
43
+ # 2. Avvia la pipeline di indicizzazione (Extractor -> Chunker -> Neo4j)
44
+ indexer_worker.index_pdf(file_path, user_id)
45
+
46
+ return {
47
+ "status": "success",
48
+ "message": "Indicizzazione completata con successo",
49
+ "filename": file.filename
50
+ }
51
+
52
+ except Exception as e:
53
+ logger.error(f"Errore durante l'upload/indicizzazione: {str(e)}")
54
+ raise HTTPException(status_code=500, detail=str(e))
55
+
56
+ finally:
57
+ # 3. Pulizia file temporaneo
58
+ if os.path.exists(file_path):
59
+ os.remove(file_path)
60
+
61
+ @app.post("/chat")
62
+ async def chat(query: str, filename: str, user_id: str):
63
+ """
64
+ Endpoint per interrogare il sistema GraphRAG tramite LangGraph.
65
+ """
66
+ try:
67
+ # Stato iniziale per il grafo
68
+ initial_state = {
69
+ "query": query,
70
+ "user_id": user_id,
71
+ "filename": filename,
72
+ "intent_data": {},
73
+ "context_chunks": [],
74
+ "final_answer": ""
75
+ }
76
+
77
+ # Esecuzione del workflow
78
+ result = rag_app.invoke(initial_state)
79
+
80
+ return {"answer": result["final_answer"]}
81
+
82
+ except Exception as e:
83
+ logger.error(f"Errore nella chat endpoint: {str(e)}")
84
+ raise HTTPException(status_code=500, detail="Errore durante l'elaborazione della domanda.")
85
+
86
+ if __name__ == "__main__":
87
+ import uvicorn
88
+ uvicorn.run("api:app", host="127.0.0.1", port=8000, reload=True)
db/graph_db.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Questo file gestisce la connessione e tutte le query Cypher
2
+
3
+ from neo4j import GraphDatabase, exceptions
4
+ import logging
5
+ import os
6
+ from typing import List, Dict, Any, Optional
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ logging.basicConfig(level=logging.INFO)
11
+
12
+ #Classe per la gestione della connessione e delle operazioni di base con Neo4j
13
+ class GraphDB:
14
+ def __init__(self, uri: Optional[str] = None, user: Optional[str] = None, password: Optional[str] = None, database: Optional[str] = None):
15
+
16
+ #Carica credenziali dalle varibiali dambiente o usa i default del progetto
17
+ self.uri = uri or os.getenv("NEO4J_URI")
18
+ self.user = user or os.getenv("NEO4J_USERNAME")
19
+ self.password = password or os.getenv("NEO4J_PASSWORD")
20
+ self.database = database or os.getenv("NEO4J_DATABASE")
21
+
22
+ # VALIDAZIONE: Forza la presenza di tutte le variabili
23
+ if not all([self.uri, self.user, self.password, self.database]):
24
+ missing_vars = [name for name, val in [
25
+ ("NEO4J_URI", self.uri),
26
+ ("NEO4J_USERNAME", self.user),
27
+ ("NEO4J_PASSWORD", self.password),
28
+ ("NEO4J_DATABASE", self.database)
29
+ ] if not val]
30
+
31
+ # Rilancia un errore chiaro se le credenziali non sono definite nell'ambiente
32
+ raise ValueError(
33
+ f"Credenziali Neo4j mancanti. Assicurati che le seguenti variabili siano definite nel file .env e caricate correttamente: {', '.join(missing_vars)}"
34
+ )
35
+
36
+ self.driver = None
37
+
38
+ try:
39
+ self.driver = GraphDatabase.driver(self.uri, auth=(self.user, self.password))
40
+ self.driver.verify_connectivity()
41
+
42
+ self.create_indexes_and_constraints()
43
+ logger.info(f"Connessione a Neo4j (DB: {self.database}) stabilita con successo.")
44
+ except Exception as e:
45
+ logger.error(f"Errore durante la connessione a Neo4j su {self.uri}: {e}")
46
+ raise
47
+
48
+ #Chiude la connessione al driver Neo4j
49
+ def close(self):
50
+ if self.driver:
51
+ self.driver.close()
52
+ logger.info("Connessione a Neo4j chiusa.")
53
+
54
+ #Crea indici e vincoli essenziali per le performance del RAG
55
+ def create_indexes_and_constraints(self):
56
+ index_queries = [
57
+ #Vincoli per l'unicità dei nodi principali (Documenti, Utenti)
58
+ "CREATE CONSTRAINT IF NOT EXISTS FOR (d:Document) REQUIRE d.filename IS UNIQUE",
59
+ "CREATE CONSTRAINT IF NOT EXISTS FOR (u:User) REQUIRE u.id IS UNIQUE",
60
+ #Indice per la ricerca di Chunk tramite ID (utile per la citazione del chunk)
61
+ "CREATE INDEX IF NOT EXISTS FOR (c:Chunk) ON (c.chunk_id)",
62
+ ]
63
+
64
+ with self.driver.session(database=self.database) as session:
65
+ for query in index_queries:
66
+ try:
67
+ session.run(query)
68
+ except exceptions.ClientError as e:
69
+ #Logga l'errore, ma ignora quelli noti di indice/vincolo già esistente
70
+ if "IndexAlreadyExists" not in e.message and "ConstraintAlreadyExists" not in e.message:
71
+ logger.error(f"Errore nell'esecuzione della query indice '{query}': {e}")
72
+ raise
73
+ except Exception as e:
74
+ logger.error(f"Errore inatteso nell'esecuzione della query indice '{query}': {e}")
75
+ raise
76
+
77
+ logger.info("Indici e vincoli Neo4j verificati/creati.")
78
+
79
+
80
+ # --- Operazioni Crud per il RAG ---
81
+ #Crea o Aggiorna un nodo Document
82
+ def create_document_node(self, filename: str, title: str = None):
83
+ query = """
84
+ MERGE (d:Document {filename: $filename})
85
+ ON CREATE SET
86
+ d.created_at = datetime(),
87
+ d.title = COALESCE($title, $filename)
88
+ ON MATCH SET d.last_updated = datetime()
89
+ RETURN d
90
+ """
91
+ return self.run_query(query, {"filename": filename, "title": title})
92
+
93
+ #Crea o aggiorna un nodo User e registra l'attività
94
+ def create_user_node(self, user_id: str):
95
+ query = """
96
+ MERGE (u:User {id: $user_id})
97
+ ON CREATE SET u.created_at = datetime(), u.last_activity = datetime()
98
+ ON MATCH SET u.last_activity = datetime()
99
+ RETURN u
100
+ """
101
+ return self.run_query(query, {"user_id": user_id})
102
+
103
+ #Crea una relazione ACCESSED tra User e Document
104
+ def link_user_to_document(self, user_id: str, filename: str):
105
+ query = """
106
+ MATCH (u:User {id: $user_id})
107
+ MATCH (d:Document {filename: $filename})
108
+ MERGE (u)-[r:ACCESSED]->(d)
109
+ ON CREATE SET r.first_access = datetime(), r.last_access = datetime()
110
+ ON MATCH SET r.last_access = datetime()
111
+ RETURN u, d, r
112
+ """
113
+ return self.run_query(query, {"user_id": user_id, "filename": filename})
114
+
115
+ #Aggiunge un nodo Chunk collegato al nodo Document
116
+ def add_chunk_to_document(self, filename: str, chunk_id: str, content: str, embedding: List[float], metadata: Dict[str, Any]):
117
+ query = """
118
+ MATCH (d:Document {filename: $filename})
119
+ MERGE (c:Chunk {chunk_id: $chunk_id})
120
+ SET c.content = $content,
121
+ c.embedding = $embedding,
122
+ c.section = $section,
123
+ c.source = $filename,
124
+ c.last_updated = datetime()
125
+ MERGE (d)-[:HAS_CHUNK]->(c)
126
+ RETURN c
127
+ """
128
+ parameters = {
129
+ "filename": filename,
130
+ "chunk_id": chunk_id,
131
+ "content": content,
132
+ "embedding": embedding,
133
+ "section": metadata.get("section", "unspecified")
134
+ }
135
+ return self.run_query(query, parameters)
136
+
137
+ #Crea un indice vettoriale per la ricerca di similarità
138
+ def create_vector_index(self, index_name: str, node_label: str, property_name: str, vector_dimensions: int):
139
+ query = f"""
140
+ CREATE VECTOR INDEX {index_name} IF NOT EXISTS
141
+ FOR (n:{node_label})
142
+ ON (n.{property_name})
143
+ OPTIONS {{
144
+ indexConfig: {{
145
+ `vector.dimensions`: {vector_dimensions},
146
+ `vector.similarity_function`: 'cosine'
147
+ }}
148
+ }}
149
+ """
150
+ try:
151
+ self.run_query(query)
152
+ logger.info(f"Indice vettoriale '{index_name}' creato con successo per {node_label}.")
153
+ except Exception as e:
154
+ logger.error(f"Errore nella creazione dell'indice vettoriale '{index_name}': {e}")
155
+ raise
156
+
157
+ #Esegue una ricerca vettoriale, opzionalemnte filtrata per documento
158
+ def query_vector_index(self, index_name: str, query_embedding: List[float], k: int = 5, filename: Optional[str] = None) -> List[Dict[str, Any]]:
159
+
160
+ # db.index.vector.queryNodes è la procedura Cypher per la ricerca vettoriale
161
+ if filename:
162
+ # Ricerca filtrata per documento specifico (più precisa)
163
+ query = f"""
164
+ CALL db.index.vector.queryNodes('{index_name}', $k, $query_embedding)
165
+ YIELD node, score
166
+ WITH node, score
167
+ MATCH (d:Document {{filename: $filename}})-[:HAS_CHUNK]->(node)
168
+ RETURN node.content AS node_content, score, node.chunk_id AS chunk_id, node.section AS section, d.filename AS filename
169
+ """
170
+ parameters = {"query_embedding": query_embedding, "filename": filename, "k": k}
171
+ else:
172
+ # Ricerca globale su tutti i documenti (usato per cross-document search)
173
+ query = f"""
174
+ CALL db.index.vector.queryNodes('{index_name}', $k, $query_embedding)
175
+ YIELD node, score
176
+ RETURN node.content AS node_content, score, node.chunk_id AS chunk_id, node.section AS section, node.source AS filename
177
+ """
178
+ parameters = {"query_embedding": query_embedding, "k": k}
179
+
180
+ results = []
181
+ try:
182
+ records = self.run_query(query, parameters)
183
+ for record in records:
184
+ results.append({
185
+ "node_content": record["node_content"],
186
+ "score": record["score"],
187
+ "chunk_id": record["chunk_id"],
188
+ "section": record.get("section", "N/A"), # Usiamo .get per sicurezza
189
+ "filename": record.get("filename", "Unknown"),
190
+ })
191
+ logger.debug(f"Ricerca vettoriale ha trovato {len(results)} risultati.")
192
+ return results
193
+ except Exception as e:
194
+ logger.error(f"Errore durante la query dell'indice vettoriale: {e}")
195
+ return []
196
+
197
+ def add_entity_to_chunk(self, entity_name, entity_type, chunk_id):
198
+ query = """
199
+ MERGE (e:Entity {name: $name, type: $type})
200
+ WITH e
201
+ MATCH (c:Chunk {chunk_id: $chunk_id})
202
+ MERGE (c)-[:CONTAINS_ENTITY]->(e)
203
+ """
204
+ params = {"name": entity_name, "type": entity_type, "chunk_id": chunk_id}
205
+ self.run_query(query, params)
206
+
207
+ #Esegue una ricerca esatta basata sui nodi Entity
208
+ def entity_search(self, entity_name: str) -> List[Dict[str, Any]]:
209
+ query = """
210
+ MATCH (e:Entity)
211
+ WHERE toLower(e.name) = toLower($name)
212
+ MATCH (e)<-[:CONTAINS_ENTITY]-(c:Chunk)
213
+ RETURN c.content AS node_content, c.chunk_id AS chunk_id, 1.0 AS score, c.section AS section, c.source AS filename
214
+ LIMIT 5
215
+ """
216
+ results = []
217
+ try:
218
+ records = self.run_query(query, {"name": entity_name})
219
+ for record in records:
220
+ results.append({
221
+ "node_content": record.get("node_content"),
222
+ "chunk_id": record.get("chunk_id"),
223
+ "score": record.get("score"),
224
+ "section": record.get("section"),
225
+ "filename": record.get("filename")
226
+ })
227
+ return results
228
+ except Exception as e:
229
+ logger.error(f"Errore nella ricerca per entità '{entity_name}': {e}")
230
+ return []
231
+
232
+ def run_query(self, query: str, parameters: Optional[Dict[str, Any]] = None):
233
+ if not self.driver:
234
+ raise RuntimeError("Driver Neo4j non inizializzato.")
235
+
236
+ with self.driver.session(database=self.database) as session:
237
+ result = session.run(query, parameters)
238
+ return result.data()
frontend/.gitignore ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Logs
2
+ logs
3
+ *.log
4
+ npm-debug.log*
5
+ yarn-debug.log*
6
+ yarn-error.log*
7
+ pnpm-debug.log*
8
+ lerna-debug.log*
9
+
10
+ node_modules
11
+ dist
12
+ dist-ssr
13
+ *.local
14
+
15
+ # Editor directories and files
16
+ .vscode/*
17
+ !.vscode/extensions.json
18
+ .idea
19
+ .DS_Store
20
+ *.suo
21
+ *.ntvs*
22
+ *.njsproj
23
+ *.sln
24
+ *.sw?
frontend/README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # React + TypeScript + Vite
2
+
3
+ This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
4
+
5
+ Currently, two official plugins are available:
6
+
7
+ - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh
8
+ - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
9
+
10
+ ## React Compiler
11
+
12
+ The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
13
+
14
+ ## Expanding the ESLint configuration
15
+
16
+ If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
17
+
18
+ ```js
19
+ export default defineConfig([
20
+ globalIgnores(['dist']),
21
+ {
22
+ files: ['**/*.{ts,tsx}'],
23
+ extends: [
24
+ // Other configs...
25
+
26
+ // Remove tseslint.configs.recommended and replace with this
27
+ tseslint.configs.recommendedTypeChecked,
28
+ // Alternatively, use this for stricter rules
29
+ tseslint.configs.strictTypeChecked,
30
+ // Optionally, add this for stylistic rules
31
+ tseslint.configs.stylisticTypeChecked,
32
+
33
+ // Other configs...
34
+ ],
35
+ languageOptions: {
36
+ parserOptions: {
37
+ project: ['./tsconfig.node.json', './tsconfig.app.json'],
38
+ tsconfigRootDir: import.meta.dirname,
39
+ },
40
+ // other options...
41
+ },
42
+ },
43
+ ])
44
+ ```
45
+
46
+ You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
47
+
48
+ ```js
49
+ // eslint.config.js
50
+ import reactX from 'eslint-plugin-react-x'
51
+ import reactDom from 'eslint-plugin-react-dom'
52
+
53
+ export default defineConfig([
54
+ globalIgnores(['dist']),
55
+ {
56
+ files: ['**/*.{ts,tsx}'],
57
+ extends: [
58
+ // Other configs...
59
+ // Enable lint rules for React
60
+ reactX.configs['recommended-typescript'],
61
+ // Enable lint rules for React DOM
62
+ reactDom.configs.recommended,
63
+ ],
64
+ languageOptions: {
65
+ parserOptions: {
66
+ project: ['./tsconfig.node.json', './tsconfig.app.json'],
67
+ tsconfigRootDir: import.meta.dirname,
68
+ },
69
+ // other options...
70
+ },
71
+ },
72
+ ])
73
+ ```
frontend/eslint.config.js ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import js from '@eslint/js'
2
+ import globals from 'globals'
3
+ import reactHooks from 'eslint-plugin-react-hooks'
4
+ import reactRefresh from 'eslint-plugin-react-refresh'
5
+ import tseslint from 'typescript-eslint'
6
+ import { defineConfig, globalIgnores } from 'eslint/config'
7
+
8
+ export default defineConfig([
9
+ globalIgnores(['dist']),
10
+ {
11
+ files: ['**/*.{ts,tsx}'],
12
+ extends: [
13
+ js.configs.recommended,
14
+ tseslint.configs.recommended,
15
+ reactHooks.configs.flat.recommended,
16
+ reactRefresh.configs.vite,
17
+ ],
18
+ languageOptions: {
19
+ ecmaVersion: 2020,
20
+ globals: globals.browser,
21
+ },
22
+ },
23
+ ])
frontend/index.html ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="it">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="/vite.svg" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>RAG Chatbot - Il Tuo Assistente PDF Intelligente</title>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Outfit:wght@400;600;800&display=swap" rel="stylesheet">
11
+ </head>
12
+ <body>
13
+ <div id="root"></div>
14
+ <script type="module" src="/src/main.tsx"></script>
15
+ </body>
16
+ </html>
frontend/metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "name": "RAG Chatbot Assistant",
3
+ "description": "A modern, high-performance landing page and application for intelligent PDF interaction. Upload your documents and chat with an AI expert indexed specifically on your content.",
4
+ "requestFramePermissions": []
5
+ }
frontend/package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
frontend/package.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "frontend",
3
+ "private": true,
4
+ "version": "0.0.0",
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite",
8
+ "build": "tsc -b && vite build",
9
+ "lint": "eslint .",
10
+ "preview": "vite preview"
11
+ },
12
+ "dependencies": {
13
+ "lucide-react": "^0.562.0",
14
+ "react": "^19.2.0",
15
+ "react-dom": "^19.2.0"
16
+ },
17
+ "devDependencies": {
18
+ "@eslint/js": "^9.39.1",
19
+ "@tailwindcss/postcss": "^4.1.18",
20
+ "@types/node": "^24.10.1",
21
+ "@types/react": "^19.2.5",
22
+ "@types/react-dom": "^19.2.3",
23
+ "@vitejs/plugin-react": "^5.1.1",
24
+ "autoprefixer": "^10.4.23",
25
+ "eslint": "^9.39.1",
26
+ "eslint-plugin-react-hooks": "^7.0.1",
27
+ "eslint-plugin-react-refresh": "^0.4.24",
28
+ "globals": "^16.5.0",
29
+ "postcss": "^8.5.6",
30
+ "tailwindcss": "^4.1.18",
31
+ "typescript": "~5.9.3",
32
+ "typescript-eslint": "^8.46.4",
33
+ "vite": "^7.2.4"
34
+ }
35
+ }
frontend/postcss.config.cjs ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ module.exports = {
2
+ plugins: {
3
+ '@tailwindcss/postcss': {},
4
+ autoprefixer: {},
5
+ },
6
+ }
frontend/public/vite.svg ADDED
frontend/src/App.tsx ADDED
@@ -0,0 +1,584 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect, useRef } from 'react';
2
+ import type { Message, UserData } from './types';
3
+ import { AppStep } from './types';
4
+ import { IconFileUp, IconSend, IconBot, IconGear, IconSparkles, IconArrowRight, IconLayers, IconDatabase, IconWorkflow, IconGlobe } from './components/Icons';
5
+ import ReactMarkdown from 'react-markdown';
6
+
7
+ const App: React.FC = () => {
8
+ const [step, setStep] = useState<AppStep>(AppStep.NAME_INPUT);
9
+ const [previousStep, setPreviousStep] = useState<AppStep | null>(null);
10
+ const [userData, setUserData] = useState<UserData>({ name: '' });
11
+ const [progress, setProgress] = useState(0);
12
+ const [messages, setMessages] = useState<Message[]>([]);
13
+ const [inputValue, setInputValue] = useState('');
14
+ const [isTyping, setIsTyping] = useState(false);
15
+ const [showPlusMenu, setShowPlusMenu] = useState(false);
16
+ const chatEndRef = useRef<HTMLDivElement>(null);
17
+
18
+ const scrollToBottom = () => {
19
+ chatEndRef.current?.scrollIntoView({ behavior: 'smooth' });
20
+ };
21
+
22
+ useEffect(() => {
23
+ scrollToBottom();
24
+ }, [messages, isTyping]);
25
+
26
+ const handleNameSubmit = (e: React.FormEvent) => {
27
+ e.preventDefault();
28
+ if (userData.name.trim()) {
29
+ setStep(AppStep.PDF_UPLOAD);
30
+ }
31
+ };
32
+
33
+ const handleFileUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
34
+ const file = e.target.files?.[0];
35
+ if (file) {
36
+ setUserData({ ...userData, fileName: file.name });
37
+ setStep(AppStep.PROCESSING);
38
+ setProgress(10);
39
+
40
+ const formData = new FormData();
41
+ formData.append('file', file);
42
+ formData.append('user_id', userData.name);
43
+
44
+ // Incremento simulato della barra per dare feedback visuale durante l'elaborazione pesante
45
+ const progressInterval = setInterval(() => {
46
+ setProgress(prev => (prev < 90 ? prev + 5 : prev));
47
+ }, 800);
48
+
49
+ try {
50
+ const response = await fetch('http://127.0.0.1:8000/upload', {
51
+ method: 'POST',
52
+ body: formData,
53
+ });
54
+
55
+ clearInterval(progressInterval);
56
+
57
+ if (response.ok) {
58
+ setProgress(100);
59
+ setTimeout(() => {
60
+ setStep(AppStep.CHAT);
61
+ setMessages([{
62
+ role: 'ai',
63
+ content: 'PRESET_WELCOME',
64
+ timestamp: new Date()
65
+ }]);
66
+ }, 600);
67
+ } else {
68
+ throw new Error("Errore durante l'elaborazione del server");
69
+ }
70
+ } catch (error) {
71
+ clearInterval(progressInterval);
72
+ console.error("Errore durante l'ingestione:", error);
73
+ alert("Errore nel caricamento del file. Assicurati che il backend sia attivo e Neo4j sia raggiungibile.");
74
+ setStep(AppStep.PDF_UPLOAD);
75
+ }
76
+ }
77
+ };
78
+
79
+ const truncateFileName = (name: string, limit = 25) => {
80
+ if (name.length <= limit) return name;
81
+ return name.substring(0, limit) + '...';
82
+ };
83
+
84
+ const handleHomeClick = () => {
85
+ setStep(AppStep.NAME_INPUT);
86
+ setUserData({ name: '' });
87
+ setMessages([]);
88
+ setProgress(0);
89
+ };
90
+
91
+ const handleHowItWorksClick = () => {
92
+ setPreviousStep(step);
93
+ setStep(AppStep.HOW_IT_WORKS);
94
+ };
95
+
96
+ const returnFromHowItWorks = () => {
97
+ if (previousStep) {
98
+ setStep(previousStep);
99
+ } else {
100
+ setStep(AppStep.NAME_INPUT);
101
+ }
102
+ };
103
+
104
+ const handleSendMessage = async (e: React.FormEvent) => {
105
+ e.preventDefault();
106
+ if (!inputValue.trim() || isTyping) return;
107
+
108
+ const userMsg: Message = {
109
+ role: 'user',
110
+ content: inputValue,
111
+ timestamp: new Date()
112
+ };
113
+
114
+ setMessages(prev => [...prev, userMsg]);
115
+ setInputValue('');
116
+ setIsTyping(true);
117
+
118
+ try {
119
+ const queryParams = new URLSearchParams({
120
+ query: inputValue,
121
+ filename: userData.fileName || "documento",
122
+ user_id: userData.name
123
+ });
124
+
125
+ const response = await fetch(`http://127.0.0.1:8000/chat?${queryParams}`, {
126
+ method: 'POST',
127
+ headers: { 'Content-Type': 'application/json' }
128
+ });
129
+
130
+ if (!response.ok) throw new Error('Errore nella risposta del server');
131
+
132
+ const data = await response.json();
133
+
134
+ setMessages(prev => [...prev, {
135
+ role: 'ai',
136
+ content: data.answer,
137
+ timestamp: new Date()
138
+ }]);
139
+ } catch (error) {
140
+ console.error("Errore chat:", error);
141
+ setMessages(prev => [...prev, {
142
+ role: 'ai',
143
+ content: "Scusa, si è verificato un errore nel collegamento con il server. Assicurati che il backend sia attivo.",
144
+ timestamp: new Date()
145
+ }]);
146
+ } finally {
147
+ setIsTyping(false);
148
+ }
149
+ };
150
+
151
+ const isInitialPage = step === AppStep.NAME_INPUT || step === AppStep.PDF_UPLOAD || step === AppStep.PROCESSING;
152
+
153
+ return (
154
+ <div className="min-h-screen w-screen flex flex-col font-outfit bg-white overflow-x-hidden">
155
+
156
+ {/* Header */}
157
+ <header className={`glass-header fixed top-0 w-full z-50 py-3 md:py-4 px-4 md:px-12 flex justify-between items-center transition-opacity ${step === AppStep.CHAT || step === AppStep.HOW_IT_WORKS ? 'opacity-30 hover:opacity-100' : 'opacity-100'}`}>
158
+ <div
159
+ onClick={handleHomeClick}
160
+ className="flex items-center gap-2 cursor-pointer group"
161
+ >
162
+ <div className="bg-[#FF6600] text-white p-1.5 md:p-2 rounded-lg group-hover:scale-105 transition-transform flex items-center justify-center">
163
+ <IconBot className="w-6 h-6 md:w-8 h-8" />
164
+ </div>
165
+ <span className="text-xl md:text-2xl font-extrabold tracking-tighter transition-colors group-hover:text-[#FF6600]">RAG Chatbot</span>
166
+ </div>
167
+ <div>
168
+ <button
169
+ onClick={handleHowItWorksClick}
170
+ className="group bg-[#FF6600] text-white w-10 h-10 md:w-auto md:px-4 md:py-2 rounded-lg font-bold text-sm hover:bg-black transition-all active:scale-95 shadow-lg shadow-[#FF6600]/20 flex items-center justify-center"
171
+ >
172
+ <span className="md:hidden text-black font-black text-lg group-hover:text-white transition-colors">?</span>
173
+ <span className="hidden md:inline">Come funziona</span>
174
+ </button>
175
+ </div>
176
+ </header>
177
+
178
+ {/* Main Container */}
179
+ <main className={`flex-grow flex flex-col items-center ${(step === AppStep.CHAT || step === AppStep.HOW_IT_WORKS) ? 'h-screen pt-20' : 'pt-28 md:pt-32'}`}>
180
+
181
+ <div className={`w-full ${(step === AppStep.CHAT || step === AppStep.HOW_IT_WORKS) ? 'h-full flex flex-col' : 'max-w-4xl'}`}>
182
+
183
+ {/* INITIAL STEPS (NAME & UPLOAD) */}
184
+ {(step === AppStep.NAME_INPUT || step === AppStep.PDF_UPLOAD) && (
185
+ <div className="text-center mb-10 md:mb-12 animate-in fade-in slide-in-from-bottom-4 duration-700 px-6">
186
+ <h1 className="text-4xl md:text-6xl font-black mb-4 leading-tight">
187
+ Il Tuo Assistente PDF <br/>
188
+ <span className="text-[#FF6600]">Intelligente</span>
189
+ </h1>
190
+ <h2 className="text-base md:text-xl text-gray-600 font-light max-w-2xl mx-auto mb-10">
191
+ Carica il tuo PDF e lascialo analizzare dal nostro sistema: Poni domande in chat in linguaggio naturale e ottieni risposte mirate basate esclusivamente sul contenuto del file.
192
+ </h2>
193
+
194
+ <div className="flex flex-row items-center justify-center gap-2 md:gap-8 mb-12">
195
+ <div className="flex flex-col items-center gap-3 w-20 md:w-32 group cursor-default">
196
+ <div className={`w-12 h-12 md:w-16 md:h-16 rounded-full border-2 flex items-center justify-center transition-all bg-gray-50 border-gray-100 text-gray-400 group-hover:text-[#FF6600] group-hover:border-[#FF6600]/30 group-hover:bg-[#FF6600]/5`}>
197
+ <IconFileUp className="w-5 h-5 md:w-7 md:h-7" />
198
+ </div>
199
+ <p className={`text-[8px] md:text-[10px] font-bold tracking-widest uppercase transition-colors text-gray-400 group-hover:text-[#FF6600]`}>1. Upload</p>
200
+ </div>
201
+ <div><IconArrowRight className="text-gray-200 w-3 h-3 md:w-5 md:h-5 animate-pulse" /></div>
202
+ <div className="flex flex-col items-center gap-3 w-20 md:w-32 group cursor-default">
203
+ <div className="w-12 h-12 md:w-16 md:h-16 rounded-full bg-gray-50 border-2 border-gray-100 flex items-center justify-center text-gray-400 group-hover:text-[#FF6600] group-hover:border-[#FF6600]/30 group-hover:bg-[#FF6600]/5 transition-all">
204
+ <div className="relative">
205
+ <IconGear className="w-5 h-5 md:w-7 md:h-7 animate-gear-rotate" />
206
+ <span className="absolute -top-1 -right-1 bg-[#FF6600] text-white text-[6px] md:text-[8px] font-bold px-1 rounded-sm">PDF</span>
207
+ </div>
208
+ </div>
209
+ <p className="text-[8px] md:text-[10px] font-bold tracking-widest uppercase text-gray-400 group-hover:text-[#FF6600] transition-colors">2. Analisi</p>
210
+ </div>
211
+ <div><IconArrowRight className="text-gray-200 w-3 h-3 md:w-5 md:h-5 animate-pulse" /></div>
212
+ <div className="flex flex-col items-center gap-3 w-20 md:w-32 group cursor-default">
213
+ <div className="w-12 h-12 md:w-16 md:h-16 rounded-full bg-gray-50 border-2 border-gray-100 flex items-center justify-center text-gray-400 shadow-sm group-hover:text-[#FF6600] group-hover:border-[#FF6600]/30 group-hover:bg-[#FF6600]/5 transition-all">
214
+ <IconSparkles className="w-5 h-5 md:w-7 md:h-7" />
215
+ </div>
216
+ <p className="text-[8px] md:text-[10px] font-bold tracking-widest uppercase text-gray-400 group-hover:text-[#FF6600] transition-colors">3. Chat</p>
217
+ </div>
218
+ </div>
219
+ </div>
220
+ )}
221
+
222
+ {step === AppStep.NAME_INPUT && (
223
+ <div className="flex justify-center py-2 animate-in fade-in slide-in-from-top-4 duration-500 px-6">
224
+ <div className="bg-white border border-black/5 p-8 md:p-10 rounded-[2.5rem] shadow-2xl max-w-md w-full orange-glow transition-all">
225
+ <form onSubmit={handleNameSubmit} className="flex flex-col gap-6 md:gap-8">
226
+ <div>
227
+ <label className="block text-xs font-bold uppercase tracking-[0.2em] text-gray-400 mb-4 ml-2">Inserisci il tuo nome o User ID</label>
228
+ <input
229
+ type="text"
230
+ value={userData.name}
231
+ onChange={(e) => setUserData({...userData, name: e.target.value})}
232
+ placeholder="Es: Laura"
233
+ className="w-full bg-gray-50 border-2 border-transparent focus:border-[#FF6600]/30 focus:bg-white outline-none px-6 md:px-8 py-4 md:py-5 rounded-[1.5rem] text-lg md:text-xl font-semibold text-black placeholder:text-gray-300 transition-all shadow-sm"
234
+ autoFocus
235
+ />
236
+ </div>
237
+ <button type="submit" disabled={!userData.name.trim()} className="bg-[#FF6600] text-white font-bold py-4 md:py-5 rounded-[1.5rem] hover:bg-black shadow-lg shadow-[#FF6600]/20 transition-all transform active:scale-95 disabled:opacity-30">Continua</button>
238
+ </form>
239
+ </div>
240
+ </div>
241
+ )}
242
+
243
+ {step === AppStep.PDF_UPLOAD && (
244
+ <div className="flex flex-col items-center py-2 animate-in fade-in slide-in-from-bottom-8 duration-700 px-6">
245
+ <div className="text-center mb-8">
246
+ <h3 className="text-2xl md:text-3xl font-black text-black mb-2 tracking-tighter uppercase font-outfit">Ciao <span className="text-[#FF6600]">{userData.name}</span></h3>
247
+ <p className="text-gray-400 font-medium">Siamo pronti ad analizzare il tuo file.</p>
248
+ </div>
249
+ <div className="bg-white border-2 border-dashed border-gray-200 p-8 md:p-12 rounded-[3rem] max-w-2xl w-full flex flex-col items-center justify-center gap-6 hover:border-[#FF6600] group transition-all cursor-pointer relative shadow-sm">
250
+ <input type="file" accept="application/pdf" onChange={handleFileUpload} className="absolute inset-0 opacity-0 cursor-pointer" />
251
+ <div className="bg-gray-50 p-4 md:p-6 rounded-full group-hover:bg-[#FF6600]/10 transition-colors">
252
+ <IconFileUp className="w-10 h-10 md:w-12 md:h-12 text-gray-300 group-hover:text-[#FF6600]" />
253
+ </div>
254
+ <div className="text-center">
255
+ <p className="text-lg md:text-xl font-bold mb-1">Carica il documento ed inizia a chattare</p>
256
+ <p className="text-gray-400 text-sm font-medium">Trascina qui il file o clicca per sfogliare</p>
257
+ </div>
258
+ </div>
259
+ </div>
260
+ )}
261
+
262
+ {step === AppStep.PROCESSING && (
263
+ <div className="flex-grow flex flex-col items-center justify-center py-16 gap-8 animate-in fade-in px-6">
264
+ <div className="relative w-20 h-20 md:w-24 md:h-24">
265
+ <div className="absolute inset-0 border-4 border-gray-50 rounded-full"></div>
266
+ <div className="absolute inset-0 border-4 border-t-[#FF6600] rounded-full animate-spin"></div>
267
+ </div>
268
+ <div className="text-center max-md:max-w-xs max-w-md">
269
+ <h3 className="text-xl md:text-2xl font-extrabold mb-2">Stiamo processando il tuo documento...</h3>
270
+ <p className="text-gray-500 mb-8 font-light">Quasi pronto...</p>
271
+ <div className="w-full bg-gray-50 h-2 rounded-full overflow-hidden">
272
+ <div className="bg-[#FF6600] h-full transition-all duration-300" style={{ width: `${progress}%` }}></div>
273
+ </div>
274
+ </div>
275
+ </div>
276
+ )}
277
+
278
+ {/* CHAT STEP */}
279
+ {step === AppStep.CHAT && (
280
+ <div className="flex-grow flex flex-col w-full max-w-3xl mx-auto h-full overflow-hidden">
281
+ <div className="flex-grow overflow-y-auto px-6 py-10 space-y-12 chat-scroll">
282
+ {messages.map((msg, idx) => {
283
+ if (msg.content === 'PRESET_WELCOME') {
284
+ return (
285
+ <div key={idx} className="animate-in slide-in-from-bottom-4 duration-700">
286
+ <div className="flex items-start gap-5">
287
+ <div className="w-12 h-12 rounded-2xl bg-[#FF6600] flex items-center justify-center shadow-lg shadow-[#FF6600]/20 flex-shrink-0">
288
+ <div className="relative flex items-center justify-center w-full h-full">
289
+ <IconBot className="w-9 h-9" />
290
+ <div className="absolute -top-1 -right-1 bg-white text-black text-[7px] font-black px-1 rounded-sm border border-black/5 uppercase">PDF</div>
291
+ </div>
292
+ </div>
293
+ <div className="flex flex-col gap-1 pt-1">
294
+ <h2 className="text-xl font-normal text-gray-500 font-verdana leading-tight">
295
+ <span className="text-[#FF6600] font-medium">{userData.name}</span>, il file{" "}
296
+ <span className="text-black font-medium hover:text-[#FF6600] cursor-pointer transition-colors" title={userData.fileName}>
297
+ "{truncateFileName(userData.fileName || '', 40)}"
298
+ </span>{" "}
299
+ è pronto.
300
+ </h2>
301
+ <p className="text-sm font-light text-gray-400">
302
+ Chiedimi qualunque cosa!
303
+ </p>
304
+ </div>
305
+ </div>
306
+ </div>
307
+ );
308
+ }
309
+
310
+ const isUser = msg.role === 'user';
311
+ return (
312
+ <div
313
+ key={idx}
314
+ className={`flex ${isUser ? 'justify-end' : 'justify-start'} animate-in fade-in duration-300`}
315
+ >
316
+ <div className={`flex gap-4 max-w-[85%] ${isUser ? 'flex-row-reverse' : 'flex-row'}`}>
317
+ {!isUser && (
318
+ <div className="w-10 h-10 rounded-lg bg-gray-50 flex items-center justify-center flex-shrink-0 mt-1">
319
+ <IconBot className="w-7 h-7" />
320
+ </div>
321
+ )}
322
+ <div className={`p-4 ${
323
+ isUser
324
+ ? 'bg-gray-100 text-gray-800 rounded-2xl rounded-tr-none'
325
+ : 'text-gray-700 leading-relaxed text-[15px] markdown-container'
326
+ }`}>
327
+ {/* Rendering Markdown per interpretare il grassetto e altre formattazioni */}
328
+ <div className="font-verdana">
329
+ <ReactMarkdown>
330
+ {msg.content}
331
+ </ReactMarkdown>
332
+ </div>
333
+ <p className="text-[9px] mt-2 opacity-30 font-bold tracking-tighter">
334
+ {msg.timestamp.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}
335
+ </p>
336
+ </div>
337
+ </div>
338
+ </div>
339
+ );
340
+ })}
341
+ {isTyping && (
342
+ <div className="flex justify-start animate-in fade-in">
343
+ <div className="flex gap-4">
344
+ <div className="w-10 h-10 rounded-lg bg-gray-50 flex items-center justify-center">
345
+ <IconBot className="w-7 h-7 opacity-30 animate-pulse" />
346
+ </div>
347
+ <div className="flex gap-1.5 items-center py-2">
348
+ <span className="w-1.5 h-1.5 bg-gray-200 rounded-full animate-bounce"></span>
349
+ <span className="w-1.5 h-1.5 bg-gray-200 rounded-full animate-bounce delay-150"></span>
350
+ <span className="w-1.5 h-1.5 bg-gray-200 rounded-full animate-bounce delay-300"></span>
351
+ </div>
352
+ </div>
353
+ </div>
354
+ )}
355
+ <div ref={chatEndRef} />
356
+ </div>
357
+
358
+ <div className="px-6 pb-10 pt-4 bg-white">
359
+ <form onSubmit={handleSendMessage} className="relative max-w-2xl mx-auto">
360
+ <div className="relative flex items-center bg-gray-50 border border-gray-100 rounded-[2.5rem] p-2 focus-within:bg-white focus-within:ring-2 focus-within:ring-[#FF6600]/10 transition-all shadow-sm">
361
+ <div className="relative flex-shrink-0">
362
+ <button
363
+ type="button"
364
+ onClick={() => setShowPlusMenu(!showPlusMenu)}
365
+ className={`w-12 h-12 flex items-center justify-center rounded-full transition-all ${showPlusMenu ? 'bg-black text-white' : 'bg-[#FF6600] text-white hover:bg-black'}`}
366
+ >
367
+ <span className={`text-2xl transition-transform font-light ${showPlusMenu ? 'rotate-45' : ''}`}>+</span>
368
+ </button>
369
+ {showPlusMenu && (
370
+ <div className="absolute bottom-full left-0 mb-4 bg-white border border-gray-100 rounded-2xl shadow-xl p-2 min-w-[150px] animate-in fade-in slide-in-from-bottom-2">
371
+ <button
372
+ onClick={() => {
373
+ setStep(AppStep.PDF_UPLOAD);
374
+ setShowPlusMenu(false);
375
+ }}
376
+ className="w-full text-left px-4 py-3 text-sm font-semibold hover:bg-gray-50 rounded-xl transition-colors flex items-center gap-3 text-gray-700"
377
+ >
378
+ <IconFileUp className="w-4 h-4 text-[#FF6600]" />
379
+ Cambia PDF
380
+ </button>
381
+ </div>
382
+ )}
383
+ </div>
384
+ <input
385
+ type="text"
386
+ value={inputValue}
387
+ onChange={(e) => setInputValue(e.target.value)}
388
+ placeholder="Scrivi un messaggio..."
389
+ className="flex-grow bg-transparent border-none outline-none px-4 md:px-6 py-4 text-[14px] md:text-[15px] text-gray-800 placeholder:text-gray-300 font-verdana"
390
+ />
391
+ <button
392
+ type="submit"
393
+ disabled={!inputValue.trim() || isTyping}
394
+ className="text-[#FF6600] w-10 h-10 md:w-12 md:h-12 flex items-center justify-center flex-shrink-0 rounded-full hover:bg-[#FF6600]/10 disabled:opacity-10 transition-all"
395
+ >
396
+ <IconSend className="w-5 h-5 md:w-6 md:h-6" />
397
+ </button>
398
+ </div>
399
+ <p className="text-[6px] text-center text-gray-300 mt-4 tracking-[0.2em] uppercase font-bold px-4">
400
+ Qualsiasi sistema, per quanto sofisticato sia, può commettere errori e non può sostituirsi ad un attenta lettura del documento
401
+ </p>
402
+ </form>
403
+ </div>
404
+ </div>
405
+ )}
406
+
407
+ {/* HOW IT WORKS STEP */}
408
+ {step === AppStep.HOW_IT_WORKS && (
409
+ <div className="flex-grow overflow-y-auto px-6 py-10 chat-scroll animate-in fade-in duration-700">
410
+ <div className="max-w-4xl mx-auto space-y-12 pb-20">
411
+ <div className="text-center mb-16">
412
+ <h1 className="text-4xl md:text-5xl font-black mb-4 font-outfit">Architettura <span className="text-[#FF6600]">GraphRAG</span></h1>
413
+ <p className="text-gray-500 font-light text-lg">Un sistema di recupero deterministico potenziato da orchestratori LLM.</p>
414
+ </div>
415
+
416
+ <div className="grid md:grid-cols-2 gap-8">
417
+ <div className="bg-white border border-gray-100 p-8 rounded-3xl shadow-sm hover:orange-glow transition-all">
418
+ <div className="bg-[#FF6600] text-white w-12 h-12 rounded-xl flex items-center justify-center mb-6">
419
+ <IconFileUp className="w-6 h-6" />
420
+ </div>
421
+ <h3 className="text-xl font-bold mb-3">Ingestion & Struttura</h3>
422
+ <p className="text-gray-600 font-light text-sm leading-relaxed">
423
+ Il PDF non viene solo "letto", ma processato per estrarre la <strong>gerarchia logica</strong>. Utilizziamo librerie specializzate per ricostruire tabelle, titoli e sezioni, garantendo che l'informazione non perda mai il suo contesto originale durante la scomposizione.
424
+ </p>
425
+ </div>
426
+
427
+ <div className="bg-white border border-gray-100 p-8 rounded-3xl shadow-sm hover:orange-glow transition-all">
428
+ <div className="bg-black text-white w-12 h-12 rounded-xl flex items-center justify-center mb-6">
429
+ <IconLayers className="w-6 h-6" />
430
+ </div>
431
+ <h3 className="text-xl font-bold mb-3">Chunking</h3>
432
+ <p className="text-gray-600 font-light text-sm leading-relaxed">
433
+ Applichiamo una strategia di <strong>chunking ricorsivo</strong>. Il testo viene diviso in frammenti ottimizzati per gli embedding, mantenendo sovrapposizioni intelligenti tra i paragrafi per non interrompere la coerenza semantica tra i nodi del database.
434
+ </p>
435
+ </div>
436
+
437
+ <div className="bg-white border border-gray-100 p-8 rounded-3xl shadow-sm hover:orange-glow transition-all">
438
+ <div className="bg-black text-white w-12 h-12 rounded-xl flex items-center justify-center mb-6">
439
+ <IconDatabase className="w-6 h-6" />
440
+ </div>
441
+ <h3 className="text-xl font-bold mb-3">Knowledge Graph (Neo4j)</h3>
442
+ <p className="text-gray-600 font-light text-sm leading-relaxed">
443
+ I chunk e le entità vengono mappati in <strong>Neo4j</strong>. Ogni frammento diventa un nodo collegato non solo vettorialmente, ma anche per relazione logica (es. "appartiene alla sezione X"). Questo permette una navigazione dei dati multimodale: semantica e a grafo.
444
+ </p>
445
+ </div>
446
+
447
+ <div className="bg-white border border-gray-100 p-8 rounded-3xl shadow-sm hover:orange-glow transition-all">
448
+ <div className="bg-black text-white w-12 h-12 rounded-xl flex items-center justify-center mb-6">
449
+ <IconWorkflow className="w-6 h-6" />
450
+ </div>
451
+ <h3 className="text-xl font-bold mb-3">LangGraph</h3>
452
+ <p className="text-gray-500 font-light text-sm leading-relaxed">
453
+ L'intero flusso è gestito da <strong>LangGraph</strong>. Un sistema a stati finiti che coordina i nodi di analisi, recupero e validazione. Questo garantisce che ogni query segua un percorso logico rigoroso prima di produrre un output.
454
+ </p>
455
+ </div>
456
+ </div>
457
+
458
+ <div className="bg-gray-50 border border-gray-100 p-10 rounded-[3rem] shadow-inner">
459
+ <div className="text-center mb-10">
460
+ <h3 className="text-2xl font-black mb-2 font-outfit">Recupero <span className="text-[#FF6600]">Ibrido & Intelligente</span></h3>
461
+ <p className="text-gray-500 text-sm font-light max-w-2xl mx-auto">
462
+ l'LLM agisce esclusivamente come Orchestratore Decisionale: analizza l'intento della domanda per determinare la rotta di recupero ottimale. </p>
463
+ </div>
464
+
465
+ <div className="grid md:grid-cols-2 gap-8">
466
+ <div className="space-y-6">
467
+ <div className="bg-white p-6 rounded-2xl shadow-sm border border-black/5">
468
+ <div className="flex items-center gap-3 mb-3">
469
+ <div className="bg-[#FF6600]/10 px-2 py-1 rounded md font-bold text-[#FF6600] text-[10px] uppercase">Routing</div>
470
+ <h4 className="font-bold text-sm">Mistral come Router Strategico</h4>
471
+ </div>
472
+ <p className="text-xs text-gray-600 leading-relaxed font-light">
473
+ Utilizziamo <strong>Mistral</strong> per decidere la via di recupero. Se la domanda contiene entità specifiche già mappate nel DB, Mistral formula una <strong>Query Cypher</strong> basata sulle NE estratte.
474
+ </p>
475
+ </div>
476
+
477
+ <div className="bg-white p-6 rounded-2xl shadow-sm border border-black/5">
478
+ <div className="flex items-center gap-3 mb-3">
479
+ <div className="bg-black/5 px-2 py-1 rounded md font-bold text-gray-500 text-[10px] uppercase">Vector</div>
480
+ <h4 className="font-bold text-sm">Ricerca Vettoriale</h4>
481
+ </div>
482
+ <p className="text-xs text-gray-600 leading-relaxed font-light">
483
+ Il sistema attiva una ricerca vettoriale quando il router identifica quesiti di natura concettuale o descrittiva.
484
+ </p>
485
+ </div>
486
+ </div>
487
+
488
+ <div className="space-y-6">
489
+ <div className="bg-white p-6 rounded-2xl shadow-sm border border-black/5">
490
+ <div className="flex items-center gap-3 mb-3">
491
+ <div className="bg-[#FF6600]/10 px-2 py-1 rounded md font-bold text-[#FF6600] text-[10px] uppercase">Entities</div>
492
+ <h4 className="font-bold text-sm">Named Entity Extraction (GLiNER)</h4>
493
+ </div>
494
+ <p className="text-xs text-gray-600 leading-relaxed font-light">
495
+ Sfruttiamo GLiNER per identificare entità chiave (nomi, luoghi, termini tecnici) sia nel documento che nella domanda utente.
496
+ </p>
497
+ </div>
498
+
499
+ <div className="bg-white p-6 rounded-2xl shadow-sm border border-black/5">
500
+ <div className="flex items-center gap-3 mb-3">
501
+ <div className="bg-black/5 px-2 py-1 rounded md font-bold text-gray-500 text-[10px] uppercase">Rerank</div>
502
+ <h4 className="font-bold text-sm">Reranking-v2-m3</h4>
503
+ </div>
504
+ <p className="text-xs text-gray-600 leading-relaxed font-light">
505
+ I risultati grezzi del database vengono filtrati da un <strong>Cross-Encoder</strong>, nello specifico il modello BGE-Reranker-v2-m3.
506
+ </p>
507
+ </div>
508
+ </div>
509
+ </div>
510
+ </div>
511
+
512
+ <div className="bg-white border border-gray-100 p-8 rounded-[3rem] shadow-sm hover:orange-glow transition-all max-w-2xl mx-auto">
513
+ <div className="flex flex-col items-center text-center">
514
+ <div className="bg-[#FF6600] text-white w-14 h-14 rounded-2xl flex items-center justify-center mb-6 shadow-lg shadow-[#FF6600]/20">
515
+ <IconGlobe className="w-8 h-8" />
516
+ </div>
517
+ <h3 className="text-2xl font-black mb-3 font-outfit">Ricerca Globale</h3>
518
+ <p className="text-gray-600 font-light text-sm leading-relaxed">
519
+ Se la ricerca vettoriale sul documento attivo produce uno score di pertinenza basso, il sistema attiva automaticamente una <strong>Ricerca globale</strong>.
520
+ </p>
521
+ </div>
522
+ </div>
523
+
524
+ <div className="flex flex-col items-center text-center max-w-2xl mx-auto space-y-6">
525
+ <div className="bg-[#FF6600] text-white w-16 h-16 rounded-2xl flex items-center justify-center shadow-lg shadow-[#FF6600]/20">
526
+ <IconBot className="w-10 h-10" />
527
+ </div>
528
+ <h3 className="text-2xl font-black font-outfit">Sintesi</h3>
529
+ <p className="text-gray-600 font-light leading-relaxed">
530
+ L'LLM finale non genera testo libero ma agisce esclusivamente da sintetizzatore: riceve i dati filtrati e produce una risposta ancorata a una fonte certa.
531
+ </p>
532
+ </div>
533
+
534
+ <div className="flex justify-center pt-10">
535
+ <button
536
+ onClick={returnFromHowItWorks}
537
+ className="bg-black text-white px-12 py-5 rounded-2xl font-bold hover:bg-[#FF6600] transition-all shadow-xl active:scale-95"
538
+ >
539
+ Torna alla Chat
540
+ </button>
541
+ </div>
542
+ </div>
543
+ </div>
544
+ )}
545
+ </div>
546
+ </main>
547
+
548
+ {/* Developer Footer */}
549
+ {isInitialPage && (
550
+ <div className="w-full mt-20 py-12 px-6 border-t border-gray-100 flex flex-col items-center gap-6 animate-in fade-in slide-in-from-bottom-4 duration-1000 bg-white">
551
+ <div className="flex items-center gap-8">
552
+ <a href="https://github.com/ValerioBotto" target="_blank" rel="noopener noreferrer" className="group flex flex-col items-center gap-2">
553
+ <div className="p-3 bg-gray-50 rounded-xl group-hover:bg-black group-hover:text-white transition-all">
554
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M15 22v-4a4.8 4.8 0 0 0-1-3.5c3 0 6-2 6-5.5.08-1.25-.27-2.48-1-3.5.28-1.15.28-2.35 0-3.5 0 0-1 0-3 1.5-2.64-.5-5.36-.5-8 0C6 2 5 2 5 2c-.3 1.15-.3 2.35 0 3.5A5.403 5.403 0 0 0 4 9c0 3.5 3 5.5 6 5.5-.39.49-.68 1.05-.85 1.65-.17.6-.22 1.23-.15 1.85v4"></path><path d="M9 18c-4.51 2-5-2-7-2"></path></svg>
555
+ </div>
556
+ <span className="text-[10px] font-bold uppercase tracking-widest text-gray-400 group-hover:text-black">GitHub</span>
557
+ </a>
558
+ <a href="https://www.linkedin.com/in/valerio-botto-4844b2190/" target="_blank" rel="noopener noreferrer" className="group flex flex-col items-center gap-2">
559
+ <div className="p-3 bg-gray-50 rounded-xl group-hover:bg-[#0077b5] group-hover:text-white transition-all">
560
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M16 8a6 6 0 0 1 6 6v7h-4v-7a2 2 0 0 0-2-2 2 2 0 0 0-2 2v7h-4v-7a6 6 0 0 1 6-6z"></path><rect width="4" height="12" x="2" y="9"></rect><circle cx="4" cy="4" r="2"></circle></svg>
561
+ </div>
562
+ <span className="text-[10px] font-bold uppercase tracking-widest text-gray-400 group-hover:text-[#0077b5]">LinkedIn</span>
563
+ </a>
564
+ </div>
565
+ <div className="text-center">
566
+ <p className="text-[10px] text-gray-300 font-black tracking-[0.4em] uppercase">
567
+ Developed by <span className="text-black">Valerio Botto</span>
568
+ </p>
569
+ </div>
570
+ </div>
571
+ )}
572
+
573
+ {(step !== AppStep.CHAT && step !== AppStep.HOW_IT_WORKS && !isInitialPage) && (
574
+ <footer className="py-10 text-center border-t border-gray-50 bg-white">
575
+ <p className="text-[10px] text-gray-300 font-black tracking-[0.4em] uppercase">
576
+ &copy; 2024 RAG CHATBOT
577
+ </p>
578
+ </footer>
579
+ )}
580
+ </div>
581
+ );
582
+ };
583
+
584
+ export default App;
frontend/src/assets/react.svg ADDED
frontend/src/components/Icons.tsx ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+
3
+ export const IconUser = ({ className }: { className?: string }) => (
4
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}><path d="M19 21v-2a4 4 0 0 0-4-4H9a4 4 0 0 0-4 4v2"></path><circle cx="12" cy="7" r="4"></circle></svg>
5
+ );
6
+
7
+ export const IconFileUp = ({ className }: { className?: string }) => (
8
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"></path><polyline points="14 2 14 8 20 8"></polyline><line x1="12" y1="18" x2="12" y2="12"></line><polyline points="9 15 12 12 15 15"></polyline></svg>
9
+ );
10
+
11
+ export const IconSend = ({ className }: { className?: string }) => (
12
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}><line x1="22" y1="2" x2="11" y2="13"></line><polygon points="22 2 15 22 11 13 2 9 22 2"></polygon></svg>
13
+ );
14
+
15
+ export const IconBot = ({ className }: { className?: string }) => (
16
+ <svg viewBox="0 0 100 100" className={className} xmlns="http://www.w3.org/2000/svg" fill="none">
17
+ {/* Antenna - positioned slightly more to the edge and shorter to allow larger head */}
18
+ <path d="M78 30 L78 18" stroke="black" strokeWidth="4.5" strokeLinecap="round"/>
19
+ <circle cx="78" cy="14" r="5.5" fill="black"/>
20
+
21
+ {/* Ears - larger and hugging the head more closely */}
22
+ <rect x="10" y="42" width="12" height="28" rx="6" fill="white" stroke="black" strokeWidth="4.5"/>
23
+ <rect x="78" y="42" width="12" height="28" rx="6" fill="white" stroke="black" strokeWidth="4.5"/>
24
+
25
+ {/* Head - expanded size to be more prominent */}
26
+ <rect x="18" y="26" width="64" height="58" rx="16" fill="white" stroke="black" strokeWidth="4.5"/>
27
+
28
+ {/* Eyes - slightly larger ovals */}
29
+ <ellipse cx="36" cy="52" rx="4.5" ry="8" fill="black"/>
30
+ <ellipse cx="64" cy="52" rx="4.5" ry="8" fill="black"/>
31
+
32
+ {/* Blush - vibrant orange circles */}
33
+ <circle cx="30" cy="62" r="6" fill="#FF8C42"/>
34
+ <circle cx="70" cy="62" r="6" fill="#FF8C42"/>
35
+
36
+ {/* Smile - cute curve */}
37
+ <path d="M44 62 Q50 69 56 62" stroke="black" strokeWidth="3.5" strokeLinecap="round" fill="none"/>
38
+ </svg>
39
+ );
40
+
41
+ export const IconGear = ({ className }: { className?: string }) => (
42
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}><path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path><circle cx="12" cy="12" r="3"></circle></svg>
43
+ );
44
+
45
+ export const IconSparkles = ({ className }: { className?: string }) => (
46
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}><path d="m12 3-1.912 5.813a2 2 0 0 1-1.275 1.275L3 12l5.813 1.912a2 2 0 0 1 1.275 1.275L12 21l1.912-5.813a2 2 0 0 1 1.275-1.275L21 12l-5.813-1.912a2 2 0 0 1-1.275-1.275L12 3Z"></path><path d="M5 3v4"></path><path d="M19 17v4"></path><path d="M3 5h4"></path><path d="M17 19h4"></path></svg>
47
+ );
48
+
49
+ export const IconArrowRight = ({ className }: { className?: string }) => (
50
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}><path d="M5 12h14"></path><path d="m12 5 7 7-7 7"></path></svg>
51
+ );
52
+
53
+ export const IconLayers = ({ className }: { className?: string }) => (
54
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}><path d="m12.83 2.18a2 2 0 0 0-1.66 0L2.6 6.08a1 1 0 0 0 0 1.83l8.58 3.91a2 2 0 0 0 1.66 0l8.58-3.91a1 1 0 0 0 0-1.83Z"></path><path d="m2.6 12.08 8.58 3.91a2 2 0 0 0 1.66 0l8.58-3.91"></path><path d="m2.6 17.08 8.58 3.91a2 2 0 0 0 1.66 0l8.58-3.91"></path></svg>
55
+ );
56
+
57
+ export const IconDatabase = ({ className }: { className?: string }) => (
58
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}><ellipse cx="12" cy="5" rx="9" ry="3"></ellipse><path d="M3 5V19A9 3 0 0 0 21 19V5"></path><path d="M3 12A9 3 0 0 0 21 12"></path></svg>
59
+ );
60
+
61
+ export const IconWorkflow = ({ className }: { className?: string }) => (
62
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}>
63
+ <circle cx="7" cy="7" r="3" />
64
+ <circle cx="17" cy="17" r="3" />
65
+ <path d="M10 7h4a3 3 0 0 1 3 3v4" />
66
+ <path d="M14 17h-4a3 3 0 0 1-3-3v-4" />
67
+ </svg>
68
+ );
69
+
70
+ export const IconGlobe = ({ className }: { className?: string }) => (
71
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className={className}><circle cx="12" cy="12" r="10"></circle><line x1="2" y1="12" x2="22" y2="12"></line><path d="M12 2a15.3 15.3 0 0 1 4 10 15.3 15.3 0 0 1-4 10 15.3 15.3 0 0 1-4-10 15.3 15.3 0 0 1 4-10z"></path></svg>
72
+ );
frontend/src/index.css ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import "tailwindcss";
2
+
3
+ @layer base {
4
+ body {
5
+ min-height: 100vh;
6
+ width: 100vw;
7
+ background-color: white;
8
+ color: black;
9
+ font-family: 'Inter', sans-serif;
10
+ }
11
+ }
12
+
13
+ .font-outfit { font-family: 'Outfit', sans-serif; }
14
+ .font-verdana { font-family: 'Verdana', sans-serif; }
15
+
16
+ .glass-header {
17
+ background: rgba(255, 255, 255, 0.8);
18
+ backdrop-filter: blur(10px);
19
+ border-bottom: 1px solid rgba(0, 0, 0, 0.05);
20
+ }
21
+
22
+ .orange-glow { box-shadow: 0 0 20px rgba(255, 102, 0, 0.2); }
23
+ .orange-glow:hover { box-shadow: 0 0 30px rgba(255, 102, 0, 0.4); }
24
+
25
+ @keyframes rotate-gear {
26
+ from { transform: rotate(0deg); }
27
+ to { transform: rotate(360deg); }
28
+ }
29
+ .animate-gear-rotate { animation: rotate-gear 8s linear infinite; }
30
+
31
+ .chat-scroll::-webkit-scrollbar { width: 4px; }
32
+ .chat-scroll::-webkit-scrollbar-thumb {
33
+ background: rgba(0, 0, 0, 0.05);
34
+ border-radius: 10px;
35
+ }
36
+
37
+ .markdown-container strong {
38
+ font-weight: bold;
39
+ color: #FF6600;
40
+ }
41
+
42
+ .markdown-container p {
43
+ margin-bottom: 0.5rem;
44
+ }
45
+
46
+ .markdown-container p:last-child {
47
+ margin-bottom: 0;
48
+ }
frontend/src/main.tsx ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import { StrictMode } from 'react'
2
+ import { createRoot } from 'react-dom/client'
3
+ import './index.css'
4
+ import App from './App.tsx'
5
+
6
+ createRoot(document.getElementById('root')!).render(
7
+ <StrictMode>
8
+ <App />
9
+ </StrictMode>,
10
+ )
frontend/src/types.ts ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ export type AppStep = 'NAME_INPUT' | 'PDF_UPLOAD' | 'PROCESSING' | 'CHAT' | 'HOW_IT_WORKS';
3
+
4
+ export const AppStep = {
5
+ NAME_INPUT: 'NAME_INPUT',
6
+ PDF_UPLOAD: 'PDF_UPLOAD',
7
+ PROCESSING: 'PROCESSING',
8
+ CHAT: 'CHAT',
9
+ HOW_IT_WORKS: 'HOW_IT_WORKS'
10
+ } as const;
11
+
12
+ export interface Message {
13
+ role: 'user' | 'ai';
14
+ content: string;
15
+ timestamp: Date;
16
+ }
17
+
18
+ export interface UserData {
19
+ name: string;
20
+ fileName?: string;
21
+ }
frontend/tailwind.config.js ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // filepath: c:\Users\valer\Desktop\File\RAG\frontend\tailwind.config.js
2
+ /** @type {import('tailwindcss').Config} */
3
+ module.exports = {
4
+ content: [
5
+ "./src/**/*.{js,ts,jsx,tsx}",
6
+ "./public/index.html",
7
+ ],
8
+ theme: {
9
+ extend: {},
10
+ },
11
+ plugins: [],
12
+ }
frontend/tsconfig.app.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
4
+ "target": "ES2022",
5
+ "useDefineForClassFields": true,
6
+ "lib": ["ES2022", "DOM", "DOM.Iterable"],
7
+ "module": "ESNext",
8
+ "types": ["vite/client"],
9
+ "skipLibCheck": true,
10
+
11
+ /* Bundler mode */
12
+ "moduleResolution": "bundler",
13
+ "allowImportingTsExtensions": true,
14
+ "verbatimModuleSyntax": true,
15
+ "moduleDetection": "force",
16
+ "noEmit": true,
17
+ "jsx": "react-jsx",
18
+
19
+ /* Linting */
20
+ "strict": true,
21
+ "noUnusedLocals": true,
22
+ "noUnusedParameters": true,
23
+ "erasableSyntaxOnly": true,
24
+ "noFallthroughCasesInSwitch": true,
25
+ "noUncheckedSideEffectImports": true
26
+ },
27
+ "include": ["src"]
28
+ }
frontend/tsconfig.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [],
3
+ "references": [
4
+ { "path": "./tsconfig.app.json" },
5
+ { "path": "./tsconfig.node.json" }
6
+ ]
7
+ }
frontend/tsconfig.node.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
4
+ "target": "ES2023",
5
+ "lib": ["ES2023"],
6
+ "module": "ESNext",
7
+ "types": ["node"],
8
+ "skipLibCheck": true,
9
+
10
+ /* Bundler mode */
11
+ "moduleResolution": "bundler",
12
+ "allowImportingTsExtensions": true,
13
+ "verbatimModuleSyntax": true,
14
+ "moduleDetection": "force",
15
+ "noEmit": true,
16
+
17
+ /* Linting */
18
+ "strict": true,
19
+ "noUnusedLocals": true,
20
+ "noUnusedParameters": true,
21
+ "erasableSyntaxOnly": true,
22
+ "noFallthroughCasesInSwitch": true,
23
+ "noUncheckedSideEffectImports": true
24
+ },
25
+ "include": ["vite.config.ts"]
26
+ }
frontend/vite.config.ts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import { defineConfig } from 'vite'
2
+ import react from '@vitejs/plugin-react'
3
+
4
+ // https://vite.dev/config/
5
+ export default defineConfig({
6
+ plugins: [react()],
7
+ })
main.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import logging
4
+ from processingPdf.loader import get_layout_extractor
5
+ from processingPdf.logicSections import extract_logical_sections
6
+ from processingPdf.chunker import split_sections_into_chunks
7
+ from processingPdf.indexer import Indexer
8
+ from db.graph_db import GraphDB
9
+ from dotenv import load_dotenv
10
+
11
+ # Importazione del workflow LangGraph
12
+ from agentLogic.graph import app as rag_app
13
+
14
+ # Configurazione Logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ load_dotenv()
19
+
20
+ st.set_page_config(page_title="RAG Chatbot - PDF", layout="wide")
21
+
22
+ # Inizializzazione componenti nel session_state
23
+ if "indexer" not in st.session_state:
24
+ st.session_state.indexer = Indexer()
25
+ if "layout_extractor" not in st.session_state:
26
+ st.session_state.layout_extractor = get_layout_extractor()
27
+ if "chat_history" not in st.session_state:
28
+ st.session_state.chat_history = []
29
+ if "pdf_ready" not in st.session_state:
30
+ st.session_state.pdf_ready = False
31
+
32
+ st.title("RAG Chatbot")
33
+ st.markdown("Carica un PDF per indicizzarlo su Neo4j e iniziare a chattare.")
34
+
35
+ # --- SIDEBAR: CARICAMENTO E INDICIZZAZIONE ---
36
+ with st.sidebar:
37
+ st.header("Profilo Utente")
38
+
39
+ user_name_input = st.text_input("Ciao! Inserisci il tuo nome/nickname", placeholder="es. Laura")
40
+ user_id_clean = user_name_input.strip().lower().replace(" ", "_")
41
+
42
+ if not user_id_clean:
43
+ st.warning("Inserisci un nome utente per procedere.")
44
+ else:
45
+ st.success(f"Loggato come: {user_id_clean}")
46
+
47
+ st.divider()
48
+ st.header("Caricamento PDF")
49
+ uploaded_file = st.file_uploader("Scegli un file PDF", type="pdf")
50
+
51
+ if uploaded_file is not None and user_id_clean:
52
+ if st.button("Indicizza Documento"):
53
+ with st.spinner("Elaborazione in corso..."):
54
+ try:
55
+ file_bytes = uploaded_file.getvalue()
56
+ filename = uploaded_file.name
57
+
58
+ doc = st.session_state.layout_extractor(file_bytes)
59
+ sections = extract_logical_sections(doc)
60
+ chunks = split_sections_into_chunks(sections, filename)
61
+
62
+ st.session_state.indexer.index_chunks_to_neo4j(filename, chunks, user_id_clean)
63
+
64
+ st.success(f"Ottimo {user_id_clean}! {len(chunks)} chunk indicizzati con successo.")
65
+ st.session_state.pdf_ready = True
66
+ st.session_state.current_user = user_id_clean
67
+ except Exception as e:
68
+ st.error(f"Errore durante l'indicizzazione: {e}")
69
+ logger.error(f"Errore: {e}", exc_info=True)
70
+
71
+ # --- AREA CHAT ---
72
+ st.divider()
73
+
74
+ # Visualizzazione cronologia
75
+ for message in st.session_state.chat_history:
76
+ with st.chat_message(message["role"]):
77
+ st.markdown(message["content"])
78
+
79
+ # Input utente
80
+ if prompt := st.chat_input("Fai una domanda sul documento..."):
81
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
82
+ with st.chat_message("user"):
83
+ st.markdown(prompt)
84
+
85
+ with st.chat_message("assistant"):
86
+ response_placeholder = st.empty()
87
+
88
+ if not st.session_state.pdf_ready:
89
+ full_response = "Per favore, carica e indicizza un PDF prima di iniziare la chat."
90
+ response_placeholder.markdown(full_response)
91
+ else:
92
+ with st.spinner("Ricerca e generazione risposta in corso..."):
93
+ try:
94
+ # Preparazione dello stato iniziale per il grafo
95
+ initial_state = {
96
+ "query": prompt,
97
+ "user_id": st.session_state.current_user,
98
+ "filename": uploaded_file.name,
99
+ "intent_data": {},
100
+ "context_chunks": [],
101
+ "final_answer": ""
102
+ }
103
+
104
+ # Esecuzione del workflow LangGraph
105
+ # Usiamo invoke per ottenere il risultato finale dopo che tutti i nodi sono stati processati
106
+ result = rag_app.invoke(initial_state)
107
+
108
+ full_response = result.get("final_answer", "Non sono riuscito a generare una risposta.")
109
+ response_placeholder.markdown(full_response)
110
+
111
+ except Exception as e:
112
+ full_response = f"Errore durante l'elaborazione della domanda: {e}"
113
+ response_placeholder.error(full_response)
114
+ logger.error(f"Errore Chat: {e}", exc_info=True)
115
+
116
+ st.session_state.chat_history.append({"role": "assistant", "content": full_response})
package-lock.json ADDED
@@ -0,0 +1,1242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "RAG",
3
+ "lockfileVersion": 3,
4
+ "requires": true,
5
+ "packages": {
6
+ "": {
7
+ "dependencies": {
8
+ "react-markdown": "^10.1.0"
9
+ }
10
+ },
11
+ "node_modules/@types/debug": {
12
+ "version": "4.1.12",
13
+ "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz",
14
+ "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==",
15
+ "license": "MIT",
16
+ "dependencies": {
17
+ "@types/ms": "*"
18
+ }
19
+ },
20
+ "node_modules/@types/estree": {
21
+ "version": "1.0.8",
22
+ "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
23
+ "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
24
+ "license": "MIT"
25
+ },
26
+ "node_modules/@types/estree-jsx": {
27
+ "version": "1.0.5",
28
+ "resolved": "https://registry.npmjs.org/@types/estree-jsx/-/estree-jsx-1.0.5.tgz",
29
+ "integrity": "sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==",
30
+ "license": "MIT",
31
+ "dependencies": {
32
+ "@types/estree": "*"
33
+ }
34
+ },
35
+ "node_modules/@types/hast": {
36
+ "version": "3.0.4",
37
+ "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
38
+ "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
39
+ "license": "MIT",
40
+ "dependencies": {
41
+ "@types/unist": "*"
42
+ }
43
+ },
44
+ "node_modules/@types/mdast": {
45
+ "version": "4.0.4",
46
+ "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz",
47
+ "integrity": "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==",
48
+ "license": "MIT",
49
+ "dependencies": {
50
+ "@types/unist": "*"
51
+ }
52
+ },
53
+ "node_modules/@types/ms": {
54
+ "version": "2.1.0",
55
+ "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz",
56
+ "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==",
57
+ "license": "MIT"
58
+ },
59
+ "node_modules/@types/react": {
60
+ "version": "19.2.8",
61
+ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.8.tgz",
62
+ "integrity": "sha512-3MbSL37jEchWZz2p2mjntRZtPt837ij10ApxKfgmXCTuHWagYg7iA5bqPw6C8BMPfwidlvfPI/fxOc42HLhcyg==",
63
+ "license": "MIT",
64
+ "peer": true,
65
+ "dependencies": {
66
+ "csstype": "^3.2.2"
67
+ }
68
+ },
69
+ "node_modules/@types/unist": {
70
+ "version": "3.0.3",
71
+ "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
72
+ "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==",
73
+ "license": "MIT"
74
+ },
75
+ "node_modules/@ungap/structured-clone": {
76
+ "version": "1.3.0",
77
+ "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.3.0.tgz",
78
+ "integrity": "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==",
79
+ "license": "ISC"
80
+ },
81
+ "node_modules/bail": {
82
+ "version": "2.0.2",
83
+ "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz",
84
+ "integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==",
85
+ "license": "MIT",
86
+ "funding": {
87
+ "type": "github",
88
+ "url": "https://github.com/sponsors/wooorm"
89
+ }
90
+ },
91
+ "node_modules/ccount": {
92
+ "version": "2.0.1",
93
+ "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz",
94
+ "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==",
95
+ "license": "MIT",
96
+ "funding": {
97
+ "type": "github",
98
+ "url": "https://github.com/sponsors/wooorm"
99
+ }
100
+ },
101
+ "node_modules/character-entities": {
102
+ "version": "2.0.2",
103
+ "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-2.0.2.tgz",
104
+ "integrity": "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==",
105
+ "license": "MIT",
106
+ "funding": {
107
+ "type": "github",
108
+ "url": "https://github.com/sponsors/wooorm"
109
+ }
110
+ },
111
+ "node_modules/character-entities-html4": {
112
+ "version": "2.1.0",
113
+ "resolved": "https://registry.npmjs.org/character-entities-html4/-/character-entities-html4-2.1.0.tgz",
114
+ "integrity": "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==",
115
+ "license": "MIT",
116
+ "funding": {
117
+ "type": "github",
118
+ "url": "https://github.com/sponsors/wooorm"
119
+ }
120
+ },
121
+ "node_modules/character-entities-legacy": {
122
+ "version": "3.0.0",
123
+ "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-3.0.0.tgz",
124
+ "integrity": "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==",
125
+ "license": "MIT",
126
+ "funding": {
127
+ "type": "github",
128
+ "url": "https://github.com/sponsors/wooorm"
129
+ }
130
+ },
131
+ "node_modules/character-reference-invalid": {
132
+ "version": "2.0.1",
133
+ "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz",
134
+ "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==",
135
+ "license": "MIT",
136
+ "funding": {
137
+ "type": "github",
138
+ "url": "https://github.com/sponsors/wooorm"
139
+ }
140
+ },
141
+ "node_modules/comma-separated-tokens": {
142
+ "version": "2.0.3",
143
+ "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz",
144
+ "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==",
145
+ "license": "MIT",
146
+ "funding": {
147
+ "type": "github",
148
+ "url": "https://github.com/sponsors/wooorm"
149
+ }
150
+ },
151
+ "node_modules/csstype": {
152
+ "version": "3.2.3",
153
+ "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
154
+ "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==",
155
+ "license": "MIT",
156
+ "peer": true
157
+ },
158
+ "node_modules/debug": {
159
+ "version": "4.4.3",
160
+ "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
161
+ "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
162
+ "license": "MIT",
163
+ "dependencies": {
164
+ "ms": "^2.1.3"
165
+ },
166
+ "engines": {
167
+ "node": ">=6.0"
168
+ },
169
+ "peerDependenciesMeta": {
170
+ "supports-color": {
171
+ "optional": true
172
+ }
173
+ }
174
+ },
175
+ "node_modules/decode-named-character-reference": {
176
+ "version": "1.2.0",
177
+ "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.2.0.tgz",
178
+ "integrity": "sha512-c6fcElNV6ShtZXmsgNgFFV5tVX2PaV4g+MOAkb8eXHvn6sryJBrZa9r0zV6+dtTyoCKxtDy5tyQ5ZwQuidtd+Q==",
179
+ "license": "MIT",
180
+ "dependencies": {
181
+ "character-entities": "^2.0.0"
182
+ },
183
+ "funding": {
184
+ "type": "github",
185
+ "url": "https://github.com/sponsors/wooorm"
186
+ }
187
+ },
188
+ "node_modules/dequal": {
189
+ "version": "2.0.3",
190
+ "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
191
+ "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==",
192
+ "license": "MIT",
193
+ "engines": {
194
+ "node": ">=6"
195
+ }
196
+ },
197
+ "node_modules/devlop": {
198
+ "version": "1.1.0",
199
+ "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
200
+ "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==",
201
+ "license": "MIT",
202
+ "dependencies": {
203
+ "dequal": "^2.0.0"
204
+ },
205
+ "funding": {
206
+ "type": "github",
207
+ "url": "https://github.com/sponsors/wooorm"
208
+ }
209
+ },
210
+ "node_modules/estree-util-is-identifier-name": {
211
+ "version": "3.0.0",
212
+ "resolved": "https://registry.npmjs.org/estree-util-is-identifier-name/-/estree-util-is-identifier-name-3.0.0.tgz",
213
+ "integrity": "sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==",
214
+ "license": "MIT",
215
+ "funding": {
216
+ "type": "opencollective",
217
+ "url": "https://opencollective.com/unified"
218
+ }
219
+ },
220
+ "node_modules/extend": {
221
+ "version": "3.0.2",
222
+ "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
223
+ "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==",
224
+ "license": "MIT"
225
+ },
226
+ "node_modules/hast-util-to-jsx-runtime": {
227
+ "version": "2.3.6",
228
+ "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz",
229
+ "integrity": "sha512-zl6s8LwNyo1P9uw+XJGvZtdFF1GdAkOg8ujOw+4Pyb76874fLps4ueHXDhXWdk6YHQ6OgUtinliG7RsYvCbbBg==",
230
+ "license": "MIT",
231
+ "dependencies": {
232
+ "@types/estree": "^1.0.0",
233
+ "@types/hast": "^3.0.0",
234
+ "@types/unist": "^3.0.0",
235
+ "comma-separated-tokens": "^2.0.0",
236
+ "devlop": "^1.0.0",
237
+ "estree-util-is-identifier-name": "^3.0.0",
238
+ "hast-util-whitespace": "^3.0.0",
239
+ "mdast-util-mdx-expression": "^2.0.0",
240
+ "mdast-util-mdx-jsx": "^3.0.0",
241
+ "mdast-util-mdxjs-esm": "^2.0.0",
242
+ "property-information": "^7.0.0",
243
+ "space-separated-tokens": "^2.0.0",
244
+ "style-to-js": "^1.0.0",
245
+ "unist-util-position": "^5.0.0",
246
+ "vfile-message": "^4.0.0"
247
+ },
248
+ "funding": {
249
+ "type": "opencollective",
250
+ "url": "https://opencollective.com/unified"
251
+ }
252
+ },
253
+ "node_modules/hast-util-whitespace": {
254
+ "version": "3.0.0",
255
+ "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz",
256
+ "integrity": "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==",
257
+ "license": "MIT",
258
+ "dependencies": {
259
+ "@types/hast": "^3.0.0"
260
+ },
261
+ "funding": {
262
+ "type": "opencollective",
263
+ "url": "https://opencollective.com/unified"
264
+ }
265
+ },
266
+ "node_modules/html-url-attributes": {
267
+ "version": "3.0.1",
268
+ "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz",
269
+ "integrity": "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ==",
270
+ "license": "MIT",
271
+ "funding": {
272
+ "type": "opencollective",
273
+ "url": "https://opencollective.com/unified"
274
+ }
275
+ },
276
+ "node_modules/inline-style-parser": {
277
+ "version": "0.2.7",
278
+ "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.7.tgz",
279
+ "integrity": "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA==",
280
+ "license": "MIT"
281
+ },
282
+ "node_modules/is-alphabetical": {
283
+ "version": "2.0.1",
284
+ "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-2.0.1.tgz",
285
+ "integrity": "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==",
286
+ "license": "MIT",
287
+ "funding": {
288
+ "type": "github",
289
+ "url": "https://github.com/sponsors/wooorm"
290
+ }
291
+ },
292
+ "node_modules/is-alphanumerical": {
293
+ "version": "2.0.1",
294
+ "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-2.0.1.tgz",
295
+ "integrity": "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==",
296
+ "license": "MIT",
297
+ "dependencies": {
298
+ "is-alphabetical": "^2.0.0",
299
+ "is-decimal": "^2.0.0"
300
+ },
301
+ "funding": {
302
+ "type": "github",
303
+ "url": "https://github.com/sponsors/wooorm"
304
+ }
305
+ },
306
+ "node_modules/is-decimal": {
307
+ "version": "2.0.1",
308
+ "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-2.0.1.tgz",
309
+ "integrity": "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==",
310
+ "license": "MIT",
311
+ "funding": {
312
+ "type": "github",
313
+ "url": "https://github.com/sponsors/wooorm"
314
+ }
315
+ },
316
+ "node_modules/is-hexadecimal": {
317
+ "version": "2.0.1",
318
+ "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz",
319
+ "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==",
320
+ "license": "MIT",
321
+ "funding": {
322
+ "type": "github",
323
+ "url": "https://github.com/sponsors/wooorm"
324
+ }
325
+ },
326
+ "node_modules/is-plain-obj": {
327
+ "version": "4.1.0",
328
+ "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz",
329
+ "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==",
330
+ "license": "MIT",
331
+ "engines": {
332
+ "node": ">=12"
333
+ },
334
+ "funding": {
335
+ "url": "https://github.com/sponsors/sindresorhus"
336
+ }
337
+ },
338
+ "node_modules/longest-streak": {
339
+ "version": "3.1.0",
340
+ "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz",
341
+ "integrity": "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==",
342
+ "license": "MIT",
343
+ "funding": {
344
+ "type": "github",
345
+ "url": "https://github.com/sponsors/wooorm"
346
+ }
347
+ },
348
+ "node_modules/mdast-util-from-markdown": {
349
+ "version": "2.0.2",
350
+ "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-2.0.2.tgz",
351
+ "integrity": "sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA==",
352
+ "license": "MIT",
353
+ "dependencies": {
354
+ "@types/mdast": "^4.0.0",
355
+ "@types/unist": "^3.0.0",
356
+ "decode-named-character-reference": "^1.0.0",
357
+ "devlop": "^1.0.0",
358
+ "mdast-util-to-string": "^4.0.0",
359
+ "micromark": "^4.0.0",
360
+ "micromark-util-decode-numeric-character-reference": "^2.0.0",
361
+ "micromark-util-decode-string": "^2.0.0",
362
+ "micromark-util-normalize-identifier": "^2.0.0",
363
+ "micromark-util-symbol": "^2.0.0",
364
+ "micromark-util-types": "^2.0.0",
365
+ "unist-util-stringify-position": "^4.0.0"
366
+ },
367
+ "funding": {
368
+ "type": "opencollective",
369
+ "url": "https://opencollective.com/unified"
370
+ }
371
+ },
372
+ "node_modules/mdast-util-mdx-expression": {
373
+ "version": "2.0.1",
374
+ "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.1.tgz",
375
+ "integrity": "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ==",
376
+ "license": "MIT",
377
+ "dependencies": {
378
+ "@types/estree-jsx": "^1.0.0",
379
+ "@types/hast": "^3.0.0",
380
+ "@types/mdast": "^4.0.0",
381
+ "devlop": "^1.0.0",
382
+ "mdast-util-from-markdown": "^2.0.0",
383
+ "mdast-util-to-markdown": "^2.0.0"
384
+ },
385
+ "funding": {
386
+ "type": "opencollective",
387
+ "url": "https://opencollective.com/unified"
388
+ }
389
+ },
390
+ "node_modules/mdast-util-mdx-jsx": {
391
+ "version": "3.2.0",
392
+ "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.2.0.tgz",
393
+ "integrity": "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==",
394
+ "license": "MIT",
395
+ "dependencies": {
396
+ "@types/estree-jsx": "^1.0.0",
397
+ "@types/hast": "^3.0.0",
398
+ "@types/mdast": "^4.0.0",
399
+ "@types/unist": "^3.0.0",
400
+ "ccount": "^2.0.0",
401
+ "devlop": "^1.1.0",
402
+ "mdast-util-from-markdown": "^2.0.0",
403
+ "mdast-util-to-markdown": "^2.0.0",
404
+ "parse-entities": "^4.0.0",
405
+ "stringify-entities": "^4.0.0",
406
+ "unist-util-stringify-position": "^4.0.0",
407
+ "vfile-message": "^4.0.0"
408
+ },
409
+ "funding": {
410
+ "type": "opencollective",
411
+ "url": "https://opencollective.com/unified"
412
+ }
413
+ },
414
+ "node_modules/mdast-util-mdxjs-esm": {
415
+ "version": "2.0.1",
416
+ "resolved": "https://registry.npmjs.org/mdast-util-mdxjs-esm/-/mdast-util-mdxjs-esm-2.0.1.tgz",
417
+ "integrity": "sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==",
418
+ "license": "MIT",
419
+ "dependencies": {
420
+ "@types/estree-jsx": "^1.0.0",
421
+ "@types/hast": "^3.0.0",
422
+ "@types/mdast": "^4.0.0",
423
+ "devlop": "^1.0.0",
424
+ "mdast-util-from-markdown": "^2.0.0",
425
+ "mdast-util-to-markdown": "^2.0.0"
426
+ },
427
+ "funding": {
428
+ "type": "opencollective",
429
+ "url": "https://opencollective.com/unified"
430
+ }
431
+ },
432
+ "node_modules/mdast-util-phrasing": {
433
+ "version": "4.1.0",
434
+ "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz",
435
+ "integrity": "sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==",
436
+ "license": "MIT",
437
+ "dependencies": {
438
+ "@types/mdast": "^4.0.0",
439
+ "unist-util-is": "^6.0.0"
440
+ },
441
+ "funding": {
442
+ "type": "opencollective",
443
+ "url": "https://opencollective.com/unified"
444
+ }
445
+ },
446
+ "node_modules/mdast-util-to-hast": {
447
+ "version": "13.2.1",
448
+ "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.1.tgz",
449
+ "integrity": "sha512-cctsq2wp5vTsLIcaymblUriiTcZd0CwWtCbLvrOzYCDZoWyMNV8sZ7krj09FSnsiJi3WVsHLM4k6Dq/yaPyCXA==",
450
+ "license": "MIT",
451
+ "dependencies": {
452
+ "@types/hast": "^3.0.0",
453
+ "@types/mdast": "^4.0.0",
454
+ "@ungap/structured-clone": "^1.0.0",
455
+ "devlop": "^1.0.0",
456
+ "micromark-util-sanitize-uri": "^2.0.0",
457
+ "trim-lines": "^3.0.0",
458
+ "unist-util-position": "^5.0.0",
459
+ "unist-util-visit": "^5.0.0",
460
+ "vfile": "^6.0.0"
461
+ },
462
+ "funding": {
463
+ "type": "opencollective",
464
+ "url": "https://opencollective.com/unified"
465
+ }
466
+ },
467
+ "node_modules/mdast-util-to-markdown": {
468
+ "version": "2.1.2",
469
+ "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-2.1.2.tgz",
470
+ "integrity": "sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==",
471
+ "license": "MIT",
472
+ "dependencies": {
473
+ "@types/mdast": "^4.0.0",
474
+ "@types/unist": "^3.0.0",
475
+ "longest-streak": "^3.0.0",
476
+ "mdast-util-phrasing": "^4.0.0",
477
+ "mdast-util-to-string": "^4.0.0",
478
+ "micromark-util-classify-character": "^2.0.0",
479
+ "micromark-util-decode-string": "^2.0.0",
480
+ "unist-util-visit": "^5.0.0",
481
+ "zwitch": "^2.0.0"
482
+ },
483
+ "funding": {
484
+ "type": "opencollective",
485
+ "url": "https://opencollective.com/unified"
486
+ }
487
+ },
488
+ "node_modules/mdast-util-to-string": {
489
+ "version": "4.0.0",
490
+ "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-4.0.0.tgz",
491
+ "integrity": "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==",
492
+ "license": "MIT",
493
+ "dependencies": {
494
+ "@types/mdast": "^4.0.0"
495
+ },
496
+ "funding": {
497
+ "type": "opencollective",
498
+ "url": "https://opencollective.com/unified"
499
+ }
500
+ },
501
+ "node_modules/micromark": {
502
+ "version": "4.0.2",
503
+ "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.2.tgz",
504
+ "integrity": "sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA==",
505
+ "funding": [
506
+ {
507
+ "type": "GitHub Sponsors",
508
+ "url": "https://github.com/sponsors/unifiedjs"
509
+ },
510
+ {
511
+ "type": "OpenCollective",
512
+ "url": "https://opencollective.com/unified"
513
+ }
514
+ ],
515
+ "license": "MIT",
516
+ "dependencies": {
517
+ "@types/debug": "^4.0.0",
518
+ "debug": "^4.0.0",
519
+ "decode-named-character-reference": "^1.0.0",
520
+ "devlop": "^1.0.0",
521
+ "micromark-core-commonmark": "^2.0.0",
522
+ "micromark-factory-space": "^2.0.0",
523
+ "micromark-util-character": "^2.0.0",
524
+ "micromark-util-chunked": "^2.0.0",
525
+ "micromark-util-combine-extensions": "^2.0.0",
526
+ "micromark-util-decode-numeric-character-reference": "^2.0.0",
527
+ "micromark-util-encode": "^2.0.0",
528
+ "micromark-util-normalize-identifier": "^2.0.0",
529
+ "micromark-util-resolve-all": "^2.0.0",
530
+ "micromark-util-sanitize-uri": "^2.0.0",
531
+ "micromark-util-subtokenize": "^2.0.0",
532
+ "micromark-util-symbol": "^2.0.0",
533
+ "micromark-util-types": "^2.0.0"
534
+ }
535
+ },
536
+ "node_modules/micromark-core-commonmark": {
537
+ "version": "2.0.3",
538
+ "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-2.0.3.tgz",
539
+ "integrity": "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg==",
540
+ "funding": [
541
+ {
542
+ "type": "GitHub Sponsors",
543
+ "url": "https://github.com/sponsors/unifiedjs"
544
+ },
545
+ {
546
+ "type": "OpenCollective",
547
+ "url": "https://opencollective.com/unified"
548
+ }
549
+ ],
550
+ "license": "MIT",
551
+ "dependencies": {
552
+ "decode-named-character-reference": "^1.0.0",
553
+ "devlop": "^1.0.0",
554
+ "micromark-factory-destination": "^2.0.0",
555
+ "micromark-factory-label": "^2.0.0",
556
+ "micromark-factory-space": "^2.0.0",
557
+ "micromark-factory-title": "^2.0.0",
558
+ "micromark-factory-whitespace": "^2.0.0",
559
+ "micromark-util-character": "^2.0.0",
560
+ "micromark-util-chunked": "^2.0.0",
561
+ "micromark-util-classify-character": "^2.0.0",
562
+ "micromark-util-html-tag-name": "^2.0.0",
563
+ "micromark-util-normalize-identifier": "^2.0.0",
564
+ "micromark-util-resolve-all": "^2.0.0",
565
+ "micromark-util-subtokenize": "^2.0.0",
566
+ "micromark-util-symbol": "^2.0.0",
567
+ "micromark-util-types": "^2.0.0"
568
+ }
569
+ },
570
+ "node_modules/micromark-factory-destination": {
571
+ "version": "2.0.1",
572
+ "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.1.tgz",
573
+ "integrity": "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==",
574
+ "funding": [
575
+ {
576
+ "type": "GitHub Sponsors",
577
+ "url": "https://github.com/sponsors/unifiedjs"
578
+ },
579
+ {
580
+ "type": "OpenCollective",
581
+ "url": "https://opencollective.com/unified"
582
+ }
583
+ ],
584
+ "license": "MIT",
585
+ "dependencies": {
586
+ "micromark-util-character": "^2.0.0",
587
+ "micromark-util-symbol": "^2.0.0",
588
+ "micromark-util-types": "^2.0.0"
589
+ }
590
+ },
591
+ "node_modules/micromark-factory-label": {
592
+ "version": "2.0.1",
593
+ "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.1.tgz",
594
+ "integrity": "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==",
595
+ "funding": [
596
+ {
597
+ "type": "GitHub Sponsors",
598
+ "url": "https://github.com/sponsors/unifiedjs"
599
+ },
600
+ {
601
+ "type": "OpenCollective",
602
+ "url": "https://opencollective.com/unified"
603
+ }
604
+ ],
605
+ "license": "MIT",
606
+ "dependencies": {
607
+ "devlop": "^1.0.0",
608
+ "micromark-util-character": "^2.0.0",
609
+ "micromark-util-symbol": "^2.0.0",
610
+ "micromark-util-types": "^2.0.0"
611
+ }
612
+ },
613
+ "node_modules/micromark-factory-space": {
614
+ "version": "2.0.1",
615
+ "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.1.tgz",
616
+ "integrity": "sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==",
617
+ "funding": [
618
+ {
619
+ "type": "GitHub Sponsors",
620
+ "url": "https://github.com/sponsors/unifiedjs"
621
+ },
622
+ {
623
+ "type": "OpenCollective",
624
+ "url": "https://opencollective.com/unified"
625
+ }
626
+ ],
627
+ "license": "MIT",
628
+ "dependencies": {
629
+ "micromark-util-character": "^2.0.0",
630
+ "micromark-util-types": "^2.0.0"
631
+ }
632
+ },
633
+ "node_modules/micromark-factory-title": {
634
+ "version": "2.0.1",
635
+ "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-2.0.1.tgz",
636
+ "integrity": "sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==",
637
+ "funding": [
638
+ {
639
+ "type": "GitHub Sponsors",
640
+ "url": "https://github.com/sponsors/unifiedjs"
641
+ },
642
+ {
643
+ "type": "OpenCollective",
644
+ "url": "https://opencollective.com/unified"
645
+ }
646
+ ],
647
+ "license": "MIT",
648
+ "dependencies": {
649
+ "micromark-factory-space": "^2.0.0",
650
+ "micromark-util-character": "^2.0.0",
651
+ "micromark-util-symbol": "^2.0.0",
652
+ "micromark-util-types": "^2.0.0"
653
+ }
654
+ },
655
+ "node_modules/micromark-factory-whitespace": {
656
+ "version": "2.0.1",
657
+ "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.1.tgz",
658
+ "integrity": "sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==",
659
+ "funding": [
660
+ {
661
+ "type": "GitHub Sponsors",
662
+ "url": "https://github.com/sponsors/unifiedjs"
663
+ },
664
+ {
665
+ "type": "OpenCollective",
666
+ "url": "https://opencollective.com/unified"
667
+ }
668
+ ],
669
+ "license": "MIT",
670
+ "dependencies": {
671
+ "micromark-factory-space": "^2.0.0",
672
+ "micromark-util-character": "^2.0.0",
673
+ "micromark-util-symbol": "^2.0.0",
674
+ "micromark-util-types": "^2.0.0"
675
+ }
676
+ },
677
+ "node_modules/micromark-util-character": {
678
+ "version": "2.1.1",
679
+ "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.1.tgz",
680
+ "integrity": "sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==",
681
+ "funding": [
682
+ {
683
+ "type": "GitHub Sponsors",
684
+ "url": "https://github.com/sponsors/unifiedjs"
685
+ },
686
+ {
687
+ "type": "OpenCollective",
688
+ "url": "https://opencollective.com/unified"
689
+ }
690
+ ],
691
+ "license": "MIT",
692
+ "dependencies": {
693
+ "micromark-util-symbol": "^2.0.0",
694
+ "micromark-util-types": "^2.0.0"
695
+ }
696
+ },
697
+ "node_modules/micromark-util-chunked": {
698
+ "version": "2.0.1",
699
+ "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-2.0.1.tgz",
700
+ "integrity": "sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==",
701
+ "funding": [
702
+ {
703
+ "type": "GitHub Sponsors",
704
+ "url": "https://github.com/sponsors/unifiedjs"
705
+ },
706
+ {
707
+ "type": "OpenCollective",
708
+ "url": "https://opencollective.com/unified"
709
+ }
710
+ ],
711
+ "license": "MIT",
712
+ "dependencies": {
713
+ "micromark-util-symbol": "^2.0.0"
714
+ }
715
+ },
716
+ "node_modules/micromark-util-classify-character": {
717
+ "version": "2.0.1",
718
+ "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-2.0.1.tgz",
719
+ "integrity": "sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==",
720
+ "funding": [
721
+ {
722
+ "type": "GitHub Sponsors",
723
+ "url": "https://github.com/sponsors/unifiedjs"
724
+ },
725
+ {
726
+ "type": "OpenCollective",
727
+ "url": "https://opencollective.com/unified"
728
+ }
729
+ ],
730
+ "license": "MIT",
731
+ "dependencies": {
732
+ "micromark-util-character": "^2.0.0",
733
+ "micromark-util-symbol": "^2.0.0",
734
+ "micromark-util-types": "^2.0.0"
735
+ }
736
+ },
737
+ "node_modules/micromark-util-combine-extensions": {
738
+ "version": "2.0.1",
739
+ "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-2.0.1.tgz",
740
+ "integrity": "sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==",
741
+ "funding": [
742
+ {
743
+ "type": "GitHub Sponsors",
744
+ "url": "https://github.com/sponsors/unifiedjs"
745
+ },
746
+ {
747
+ "type": "OpenCollective",
748
+ "url": "https://opencollective.com/unified"
749
+ }
750
+ ],
751
+ "license": "MIT",
752
+ "dependencies": {
753
+ "micromark-util-chunked": "^2.0.0",
754
+ "micromark-util-types": "^2.0.0"
755
+ }
756
+ },
757
+ "node_modules/micromark-util-decode-numeric-character-reference": {
758
+ "version": "2.0.2",
759
+ "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-2.0.2.tgz",
760
+ "integrity": "sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==",
761
+ "funding": [
762
+ {
763
+ "type": "GitHub Sponsors",
764
+ "url": "https://github.com/sponsors/unifiedjs"
765
+ },
766
+ {
767
+ "type": "OpenCollective",
768
+ "url": "https://opencollective.com/unified"
769
+ }
770
+ ],
771
+ "license": "MIT",
772
+ "dependencies": {
773
+ "micromark-util-symbol": "^2.0.0"
774
+ }
775
+ },
776
+ "node_modules/micromark-util-decode-string": {
777
+ "version": "2.0.1",
778
+ "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-2.0.1.tgz",
779
+ "integrity": "sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ==",
780
+ "funding": [
781
+ {
782
+ "type": "GitHub Sponsors",
783
+ "url": "https://github.com/sponsors/unifiedjs"
784
+ },
785
+ {
786
+ "type": "OpenCollective",
787
+ "url": "https://opencollective.com/unified"
788
+ }
789
+ ],
790
+ "license": "MIT",
791
+ "dependencies": {
792
+ "decode-named-character-reference": "^1.0.0",
793
+ "micromark-util-character": "^2.0.0",
794
+ "micromark-util-decode-numeric-character-reference": "^2.0.0",
795
+ "micromark-util-symbol": "^2.0.0"
796
+ }
797
+ },
798
+ "node_modules/micromark-util-encode": {
799
+ "version": "2.0.1",
800
+ "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-2.0.1.tgz",
801
+ "integrity": "sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==",
802
+ "funding": [
803
+ {
804
+ "type": "GitHub Sponsors",
805
+ "url": "https://github.com/sponsors/unifiedjs"
806
+ },
807
+ {
808
+ "type": "OpenCollective",
809
+ "url": "https://opencollective.com/unified"
810
+ }
811
+ ],
812
+ "license": "MIT"
813
+ },
814
+ "node_modules/micromark-util-html-tag-name": {
815
+ "version": "2.0.1",
816
+ "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-2.0.1.tgz",
817
+ "integrity": "sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==",
818
+ "funding": [
819
+ {
820
+ "type": "GitHub Sponsors",
821
+ "url": "https://github.com/sponsors/unifiedjs"
822
+ },
823
+ {
824
+ "type": "OpenCollective",
825
+ "url": "https://opencollective.com/unified"
826
+ }
827
+ ],
828
+ "license": "MIT"
829
+ },
830
+ "node_modules/micromark-util-normalize-identifier": {
831
+ "version": "2.0.1",
832
+ "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-2.0.1.tgz",
833
+ "integrity": "sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==",
834
+ "funding": [
835
+ {
836
+ "type": "GitHub Sponsors",
837
+ "url": "https://github.com/sponsors/unifiedjs"
838
+ },
839
+ {
840
+ "type": "OpenCollective",
841
+ "url": "https://opencollective.com/unified"
842
+ }
843
+ ],
844
+ "license": "MIT",
845
+ "dependencies": {
846
+ "micromark-util-symbol": "^2.0.0"
847
+ }
848
+ },
849
+ "node_modules/micromark-util-resolve-all": {
850
+ "version": "2.0.1",
851
+ "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-2.0.1.tgz",
852
+ "integrity": "sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==",
853
+ "funding": [
854
+ {
855
+ "type": "GitHub Sponsors",
856
+ "url": "https://github.com/sponsors/unifiedjs"
857
+ },
858
+ {
859
+ "type": "OpenCollective",
860
+ "url": "https://opencollective.com/unified"
861
+ }
862
+ ],
863
+ "license": "MIT",
864
+ "dependencies": {
865
+ "micromark-util-types": "^2.0.0"
866
+ }
867
+ },
868
+ "node_modules/micromark-util-sanitize-uri": {
869
+ "version": "2.0.1",
870
+ "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-2.0.1.tgz",
871
+ "integrity": "sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==",
872
+ "funding": [
873
+ {
874
+ "type": "GitHub Sponsors",
875
+ "url": "https://github.com/sponsors/unifiedjs"
876
+ },
877
+ {
878
+ "type": "OpenCollective",
879
+ "url": "https://opencollective.com/unified"
880
+ }
881
+ ],
882
+ "license": "MIT",
883
+ "dependencies": {
884
+ "micromark-util-character": "^2.0.0",
885
+ "micromark-util-encode": "^2.0.0",
886
+ "micromark-util-symbol": "^2.0.0"
887
+ }
888
+ },
889
+ "node_modules/micromark-util-subtokenize": {
890
+ "version": "2.1.0",
891
+ "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-2.1.0.tgz",
892
+ "integrity": "sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA==",
893
+ "funding": [
894
+ {
895
+ "type": "GitHub Sponsors",
896
+ "url": "https://github.com/sponsors/unifiedjs"
897
+ },
898
+ {
899
+ "type": "OpenCollective",
900
+ "url": "https://opencollective.com/unified"
901
+ }
902
+ ],
903
+ "license": "MIT",
904
+ "dependencies": {
905
+ "devlop": "^1.0.0",
906
+ "micromark-util-chunked": "^2.0.0",
907
+ "micromark-util-symbol": "^2.0.0",
908
+ "micromark-util-types": "^2.0.0"
909
+ }
910
+ },
911
+ "node_modules/micromark-util-symbol": {
912
+ "version": "2.0.1",
913
+ "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-2.0.1.tgz",
914
+ "integrity": "sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==",
915
+ "funding": [
916
+ {
917
+ "type": "GitHub Sponsors",
918
+ "url": "https://github.com/sponsors/unifiedjs"
919
+ },
920
+ {
921
+ "type": "OpenCollective",
922
+ "url": "https://opencollective.com/unified"
923
+ }
924
+ ],
925
+ "license": "MIT"
926
+ },
927
+ "node_modules/micromark-util-types": {
928
+ "version": "2.0.2",
929
+ "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-2.0.2.tgz",
930
+ "integrity": "sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA==",
931
+ "funding": [
932
+ {
933
+ "type": "GitHub Sponsors",
934
+ "url": "https://github.com/sponsors/unifiedjs"
935
+ },
936
+ {
937
+ "type": "OpenCollective",
938
+ "url": "https://opencollective.com/unified"
939
+ }
940
+ ],
941
+ "license": "MIT"
942
+ },
943
+ "node_modules/ms": {
944
+ "version": "2.1.3",
945
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
946
+ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
947
+ "license": "MIT"
948
+ },
949
+ "node_modules/parse-entities": {
950
+ "version": "4.0.2",
951
+ "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-4.0.2.tgz",
952
+ "integrity": "sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==",
953
+ "license": "MIT",
954
+ "dependencies": {
955
+ "@types/unist": "^2.0.0",
956
+ "character-entities-legacy": "^3.0.0",
957
+ "character-reference-invalid": "^2.0.0",
958
+ "decode-named-character-reference": "^1.0.0",
959
+ "is-alphanumerical": "^2.0.0",
960
+ "is-decimal": "^2.0.0",
961
+ "is-hexadecimal": "^2.0.0"
962
+ },
963
+ "funding": {
964
+ "type": "github",
965
+ "url": "https://github.com/sponsors/wooorm"
966
+ }
967
+ },
968
+ "node_modules/parse-entities/node_modules/@types/unist": {
969
+ "version": "2.0.11",
970
+ "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz",
971
+ "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==",
972
+ "license": "MIT"
973
+ },
974
+ "node_modules/property-information": {
975
+ "version": "7.1.0",
976
+ "resolved": "https://registry.npmjs.org/property-information/-/property-information-7.1.0.tgz",
977
+ "integrity": "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==",
978
+ "license": "MIT",
979
+ "funding": {
980
+ "type": "github",
981
+ "url": "https://github.com/sponsors/wooorm"
982
+ }
983
+ },
984
+ "node_modules/react": {
985
+ "version": "19.2.3",
986
+ "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz",
987
+ "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==",
988
+ "license": "MIT",
989
+ "peer": true,
990
+ "engines": {
991
+ "node": ">=0.10.0"
992
+ }
993
+ },
994
+ "node_modules/react-markdown": {
995
+ "version": "10.1.0",
996
+ "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-10.1.0.tgz",
997
+ "integrity": "sha512-qKxVopLT/TyA6BX3Ue5NwabOsAzm0Q7kAPwq6L+wWDwisYs7R8vZ0nRXqq6rkueboxpkjvLGU9fWifiX/ZZFxQ==",
998
+ "license": "MIT",
999
+ "dependencies": {
1000
+ "@types/hast": "^3.0.0",
1001
+ "@types/mdast": "^4.0.0",
1002
+ "devlop": "^1.0.0",
1003
+ "hast-util-to-jsx-runtime": "^2.0.0",
1004
+ "html-url-attributes": "^3.0.0",
1005
+ "mdast-util-to-hast": "^13.0.0",
1006
+ "remark-parse": "^11.0.0",
1007
+ "remark-rehype": "^11.0.0",
1008
+ "unified": "^11.0.0",
1009
+ "unist-util-visit": "^5.0.0",
1010
+ "vfile": "^6.0.0"
1011
+ },
1012
+ "funding": {
1013
+ "type": "opencollective",
1014
+ "url": "https://opencollective.com/unified"
1015
+ },
1016
+ "peerDependencies": {
1017
+ "@types/react": ">=18",
1018
+ "react": ">=18"
1019
+ }
1020
+ },
1021
+ "node_modules/remark-parse": {
1022
+ "version": "11.0.0",
1023
+ "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz",
1024
+ "integrity": "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==",
1025
+ "license": "MIT",
1026
+ "dependencies": {
1027
+ "@types/mdast": "^4.0.0",
1028
+ "mdast-util-from-markdown": "^2.0.0",
1029
+ "micromark-util-types": "^2.0.0",
1030
+ "unified": "^11.0.0"
1031
+ },
1032
+ "funding": {
1033
+ "type": "opencollective",
1034
+ "url": "https://opencollective.com/unified"
1035
+ }
1036
+ },
1037
+ "node_modules/remark-rehype": {
1038
+ "version": "11.1.2",
1039
+ "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-11.1.2.tgz",
1040
+ "integrity": "sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw==",
1041
+ "license": "MIT",
1042
+ "dependencies": {
1043
+ "@types/hast": "^3.0.0",
1044
+ "@types/mdast": "^4.0.0",
1045
+ "mdast-util-to-hast": "^13.0.0",
1046
+ "unified": "^11.0.0",
1047
+ "vfile": "^6.0.0"
1048
+ },
1049
+ "funding": {
1050
+ "type": "opencollective",
1051
+ "url": "https://opencollective.com/unified"
1052
+ }
1053
+ },
1054
+ "node_modules/space-separated-tokens": {
1055
+ "version": "2.0.2",
1056
+ "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz",
1057
+ "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==",
1058
+ "license": "MIT",
1059
+ "funding": {
1060
+ "type": "github",
1061
+ "url": "https://github.com/sponsors/wooorm"
1062
+ }
1063
+ },
1064
+ "node_modules/stringify-entities": {
1065
+ "version": "4.0.4",
1066
+ "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz",
1067
+ "integrity": "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==",
1068
+ "license": "MIT",
1069
+ "dependencies": {
1070
+ "character-entities-html4": "^2.0.0",
1071
+ "character-entities-legacy": "^3.0.0"
1072
+ },
1073
+ "funding": {
1074
+ "type": "github",
1075
+ "url": "https://github.com/sponsors/wooorm"
1076
+ }
1077
+ },
1078
+ "node_modules/style-to-js": {
1079
+ "version": "1.1.21",
1080
+ "resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.21.tgz",
1081
+ "integrity": "sha512-RjQetxJrrUJLQPHbLku6U/ocGtzyjbJMP9lCNK7Ag0CNh690nSH8woqWH9u16nMjYBAok+i7JO1NP2pOy8IsPQ==",
1082
+ "license": "MIT",
1083
+ "dependencies": {
1084
+ "style-to-object": "1.0.14"
1085
+ }
1086
+ },
1087
+ "node_modules/style-to-object": {
1088
+ "version": "1.0.14",
1089
+ "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.14.tgz",
1090
+ "integrity": "sha512-LIN7rULI0jBscWQYaSswptyderlarFkjQ+t79nzty8tcIAceVomEVlLzH5VP4Cmsv6MtKhs7qaAiwlcp+Mgaxw==",
1091
+ "license": "MIT",
1092
+ "dependencies": {
1093
+ "inline-style-parser": "0.2.7"
1094
+ }
1095
+ },
1096
+ "node_modules/trim-lines": {
1097
+ "version": "3.0.1",
1098
+ "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz",
1099
+ "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==",
1100
+ "license": "MIT",
1101
+ "funding": {
1102
+ "type": "github",
1103
+ "url": "https://github.com/sponsors/wooorm"
1104
+ }
1105
+ },
1106
+ "node_modules/trough": {
1107
+ "version": "2.2.0",
1108
+ "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz",
1109
+ "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==",
1110
+ "license": "MIT",
1111
+ "funding": {
1112
+ "type": "github",
1113
+ "url": "https://github.com/sponsors/wooorm"
1114
+ }
1115
+ },
1116
+ "node_modules/unified": {
1117
+ "version": "11.0.5",
1118
+ "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz",
1119
+ "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==",
1120
+ "license": "MIT",
1121
+ "dependencies": {
1122
+ "@types/unist": "^3.0.0",
1123
+ "bail": "^2.0.0",
1124
+ "devlop": "^1.0.0",
1125
+ "extend": "^3.0.0",
1126
+ "is-plain-obj": "^4.0.0",
1127
+ "trough": "^2.0.0",
1128
+ "vfile": "^6.0.0"
1129
+ },
1130
+ "funding": {
1131
+ "type": "opencollective",
1132
+ "url": "https://opencollective.com/unified"
1133
+ }
1134
+ },
1135
+ "node_modules/unist-util-is": {
1136
+ "version": "6.0.1",
1137
+ "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.1.tgz",
1138
+ "integrity": "sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g==",
1139
+ "license": "MIT",
1140
+ "dependencies": {
1141
+ "@types/unist": "^3.0.0"
1142
+ },
1143
+ "funding": {
1144
+ "type": "opencollective",
1145
+ "url": "https://opencollective.com/unified"
1146
+ }
1147
+ },
1148
+ "node_modules/unist-util-position": {
1149
+ "version": "5.0.0",
1150
+ "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-5.0.0.tgz",
1151
+ "integrity": "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==",
1152
+ "license": "MIT",
1153
+ "dependencies": {
1154
+ "@types/unist": "^3.0.0"
1155
+ },
1156
+ "funding": {
1157
+ "type": "opencollective",
1158
+ "url": "https://opencollective.com/unified"
1159
+ }
1160
+ },
1161
+ "node_modules/unist-util-stringify-position": {
1162
+ "version": "4.0.0",
1163
+ "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz",
1164
+ "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==",
1165
+ "license": "MIT",
1166
+ "dependencies": {
1167
+ "@types/unist": "^3.0.0"
1168
+ },
1169
+ "funding": {
1170
+ "type": "opencollective",
1171
+ "url": "https://opencollective.com/unified"
1172
+ }
1173
+ },
1174
+ "node_modules/unist-util-visit": {
1175
+ "version": "5.0.0",
1176
+ "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz",
1177
+ "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==",
1178
+ "license": "MIT",
1179
+ "dependencies": {
1180
+ "@types/unist": "^3.0.0",
1181
+ "unist-util-is": "^6.0.0",
1182
+ "unist-util-visit-parents": "^6.0.0"
1183
+ },
1184
+ "funding": {
1185
+ "type": "opencollective",
1186
+ "url": "https://opencollective.com/unified"
1187
+ }
1188
+ },
1189
+ "node_modules/unist-util-visit-parents": {
1190
+ "version": "6.0.2",
1191
+ "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.2.tgz",
1192
+ "integrity": "sha512-goh1s1TBrqSqukSc8wrjwWhL0hiJxgA8m4kFxGlQ+8FYQ3C/m11FcTs4YYem7V664AhHVvgoQLk890Ssdsr2IQ==",
1193
+ "license": "MIT",
1194
+ "dependencies": {
1195
+ "@types/unist": "^3.0.0",
1196
+ "unist-util-is": "^6.0.0"
1197
+ },
1198
+ "funding": {
1199
+ "type": "opencollective",
1200
+ "url": "https://opencollective.com/unified"
1201
+ }
1202
+ },
1203
+ "node_modules/vfile": {
1204
+ "version": "6.0.3",
1205
+ "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz",
1206
+ "integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==",
1207
+ "license": "MIT",
1208
+ "dependencies": {
1209
+ "@types/unist": "^3.0.0",
1210
+ "vfile-message": "^4.0.0"
1211
+ },
1212
+ "funding": {
1213
+ "type": "opencollective",
1214
+ "url": "https://opencollective.com/unified"
1215
+ }
1216
+ },
1217
+ "node_modules/vfile-message": {
1218
+ "version": "4.0.3",
1219
+ "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz",
1220
+ "integrity": "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==",
1221
+ "license": "MIT",
1222
+ "dependencies": {
1223
+ "@types/unist": "^3.0.0",
1224
+ "unist-util-stringify-position": "^4.0.0"
1225
+ },
1226
+ "funding": {
1227
+ "type": "opencollective",
1228
+ "url": "https://opencollective.com/unified"
1229
+ }
1230
+ },
1231
+ "node_modules/zwitch": {
1232
+ "version": "2.0.4",
1233
+ "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz",
1234
+ "integrity": "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==",
1235
+ "license": "MIT",
1236
+ "funding": {
1237
+ "type": "github",
1238
+ "url": "https://github.com/sponsors/wooorm"
1239
+ }
1240
+ }
1241
+ }
1242
+ }
package.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "dependencies": {
3
+ "react-markdown": "^10.1.0"
4
+ }
5
+ }
processingPdf/chunker.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Questa parte si occupa di segmentare il testo delle sezioni in blocchi più piccoli (chunks)
2
+ # adatti alla ricerca vettoriale, utilizzando RecursiveCharacterTextSplitter (RCTS) di LangChain.
3
+
4
+ import logging
5
+ from typing import List, Dict
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from langchain_core.documents import Document
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class Chunker:
12
+ def __init__(self, chunk_size: int = 600, chunk_overlap: int = 100):
13
+ # Inizializzazione splitter
14
+ # Usa il RCTS per suddividere il testo usando una lista di separatori (newline, doppia newline, spazi, etc)
15
+ # preservando la coerenza testuale e semantica
16
+ self.chunk_size = chunk_size
17
+ self.chunk_overlap = chunk_overlap
18
+ self.text_splitter = RecursiveCharacterTextSplitter(
19
+ chunk_size=self.chunk_size,
20
+ chunk_overlap=self.chunk_overlap,
21
+ separators=["\n\n", "\n", ". ", "! ", "? ", " ", ""],
22
+ length_function=len,
23
+ )
24
+
25
+ # Questa funzione divide le sezioni logiche del documento in chunk di dimensione fissa con sovrapposizione
26
+ # Tra gli args abbiamo:
27
+ # sections: Dizionario con titoli di sezione e relativi testi
28
+ # filename: Nome del file PDF originale (usato per i metadati)
29
+ # Restituisce una lista di oggetti Document di LangChain, ciascuno contenente un chunk e i suoi metadati
30
+ def create_chunks(self, sections: Dict[str, str], filename: str) -> List[Document]:
31
+
32
+ # Gestione input
33
+ if not sections:
34
+ logger.warning("Nessuna sezione fornita per il chunking")
35
+ return []
36
+
37
+ all_chunks: List[Document] = []
38
+
39
+ # Iterazione e Chunking
40
+ for section_title, section_text in sections.items():
41
+ if not section_text.strip():
42
+ logger.debug(f"Salto sezione vuota: '{section_title}'")
43
+ continue
44
+
45
+ try:
46
+ # Normalizzo il testo in lowercase
47
+ section_text = section_text.lower()
48
+
49
+ # 1. Divide il testo della sezione (lo splitter accetta una lista di testi)
50
+ chunks_for_section = self.text_splitter.create_documents([section_text])
51
+
52
+ # 2. Aggiunge metadati a ciascun chunk
53
+ for i, chunk in enumerate(chunks_for_section):
54
+ # Metadato 'source' per il nome del documento
55
+ chunk.metadata["source"] = filename
56
+
57
+ # Metadato 'section' per il titolo logico (per il RAG)
58
+ chunk.metadata["section"] = section_title
59
+
60
+ # ID univoco per il chunk (combinazione di filename, sezione e indice)
61
+ # Normalizziamo il titolo della sezione per un ID più pulito e sicuro
62
+ clean_section_id = section_title.lower().replace(' ', '_').replace('/', '_').replace(':', '_')
63
+ chunk.metadata["chunk_id"] = f"{filename}_{clean_section_id}_{i}"
64
+
65
+ all_chunks.append(chunk)
66
+
67
+ logger.debug(f"Sezione '{section_title}' divisa in {len(chunks_for_section)} chunk.")
68
+
69
+ except Exception as e:
70
+ logger.error(f"Errore durante il chunking della sezione '{section_title}': {e}")
71
+
72
+ logger.info(f"Totale {len(all_chunks)} chunk generati.")
73
+ return all_chunks
processingPdf/extractor.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Questo documento incapsula la logica per l'estrazione delle NE e del testo strutturato
2
+
3
+ from gliner import GLiNER
4
+ import torch
5
+ import logging
6
+ from processingPdf.loader import get_layout_extractor, load_pdf_from_bytes
7
+ from processingPdf.logicSections import extract_logical_sections
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class PDFExtractor:
12
+ def __init__(self):
13
+ self.layout_extractor = get_layout_extractor()
14
+
15
+ def extract_sections(self, file_path: str):
16
+ # Legge il file in bytes per spaCyLayout
17
+ with open(file_path, "rb") as f:
18
+ pdf_bytes = f.read()
19
+
20
+ # Carica il documento ed estrae il layout
21
+ doc = load_pdf_from_bytes(pdf_bytes, self.layout_extractor)
22
+
23
+ # Suddivide in sezioni logiche
24
+ if doc:
25
+ return extract_logical_sections(doc)
26
+ return {}
27
+
28
+ class EntityExtractor:
29
+ _model = None
30
+ @staticmethod
31
+ def get_model():
32
+ if EntityExtractor._model is None:
33
+ logger.info("Caricamento del modello GLiNER...")
34
+ device = "cuda" if torch.cuda.is_available() else "cpu"
35
+ EntityExtractor._model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1").to(device)
36
+ return EntityExtractor._model
37
+
38
+ @staticmethod
39
+ def extract_ne(text: str):
40
+ model = EntityExtractor.get_model()
41
+
42
+ labels = [
43
+ # --- GENERAL & IDENTIFIERS ---
44
+ "Person", "Organization", "Location", "Date", "Time",
45
+ "Product", "Event", "Nationality", "Language",
46
+
47
+ # --- BUROCRATICO, NORMATIVO & BANDI ---
48
+ "Normative Reference", # Articoli di legge, decreti, commi
49
+ "Public Body", # Enti pubblici (es. Ministero, Commissione Europea)
50
+ "Deadline", # Scadenze per bandi, domande o pagamenti
51
+ "Requirement", # Requisiti di partecipazione o criteri di accesso
52
+ "Amount", # Cifre monetarie, borse di studio, tasse
53
+ "Evaluation Criteria", # Criteri di punteggio o valutazione
54
+ "Document Type", # Es. ISEE, Marca da bollo, Certificato di laurea
55
+
56
+ # --- TECNICO & MANUALE DI ISTRUZIONI ---
57
+ "Component", # Parti di macchinari o componenti hardware
58
+ "Technical Specification", # Es. 220V, 50Hz, risoluzione 4K, velocità rotazione
59
+ "Error Code", # Codici errore (es. E04, 404, Fault-01)
60
+ "Safety Instruction", # Avvertenze di sicurezza o pericoli
61
+ "Tool", # Strumenti necessari (es. chiave inglese, cacciavite)
62
+ "Operation Mode", # Modalità operative (es. Standby, Manuale, Eco)
63
+
64
+ # --- SCIENTIFICO, CHIMICO & FISICO ---
65
+ "Scientific Term", # Termini tecnici generali
66
+ "Chemical Compound", # Formule e nomi di sostanze (es. H2O, Glucosio)
67
+ "Theory/Law", # Leggi fisiche o teorie (es. Legge di Ohm, Relatività)
68
+ "Measurement Unit", # Unità di misura (es. Joule, Watt, Nanometri)
69
+ "Phenomenon", # Fenomeni naturali o reazioni (es. Ossidazione, Gravità)
70
+
71
+ # --- MEDICO & CLINICO ---
72
+ "Clinical Condition", # Malattie, patologie o sintomi
73
+ "Medical Parameter", # Es. Glicemia, Pressione Arteriosa, Frequenza Cardiaca
74
+ "Anatomical Structure", # Organi, ossa, muscoli o tessuti
75
+ "Drug/Medication", # Nomi di farmaci o principi attivi
76
+ "Diagnostic Test", # Es. Risonanza Magnetica, Analisi del sangue
77
+
78
+ # --- ACCADEMICO & SCOLASTICO ---
79
+ "Academic Subject", # Materie (es. Storia Moderna, Fisica Quantistica)
80
+ "Exam/Test Name", # Titoli di esami o test (es. Test TOLC, Prova Scritta)
81
+ "Degree Course", # Corsi di laurea o diplomi
82
+ "Bibliographic Source", # Citazioni, autori o titoli di testi universitari
83
+
84
+ # --- STORICO & NARRATIVO (FANTASCIENZA) ---
85
+ "Historical Period", # Ere, secoli o movimenti (es. Illuminismo, Paleolitico)
86
+ "Fictional Species", # Es. Androidi, Alieni, Specie di fantasia
87
+ "Technological Concept", # Tecnologie immaginarie o concetti futuristici
88
+
89
+ # --- QUANTITATIVO ---
90
+ "Percentage", # Percentuali e tassi
91
+ "Quantity", # Quantità generiche non monetarie
92
+ "Distance" # Distanze e lunghezze
93
+ ]
94
+
95
+ entities_found = model.predict_entities(text, labels, threshold=0.5)
96
+
97
+ entities = []
98
+ seen = set() # Per tracciare i duplicati nello stesso chunk
99
+
100
+ for ent in entities_found:
101
+ text_clean = ent["text"].strip().lower()
102
+ label_clean = ent["label"].upper().replace(" ", "_")
103
+
104
+ # Creiamo una chiave univoca per il set
105
+ entity_key = (text_clean, label_clean)
106
+
107
+ if entity_key not in seen:
108
+ entities.append({
109
+ "text": text_clean,
110
+ "label": label_clean
111
+ })
112
+ seen.add(entity_key)
113
+
114
+ return entities
processingPdf/indexer.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Questo file è responsabile di due compiti principali: caricare il modello di embedding
2
+ #e orchestrare l'indicizzazione completa su Neo4j
3
+
4
+ import logging
5
+ import torch
6
+ from typing import List
7
+ from langchain_core.documents import Document as LangchainDocument
8
+ from sentence_transformers import SentenceTransformer
9
+ from processingPdf.extractor import EntityExtractor
10
+ from dotenv import load_dotenv
11
+ import os
12
+
13
+ from db.graph_db import GraphDB
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ load_dotenv()
18
+
19
+ #Gestisce il caricamento del modello di emedding e l'indicizzazione dei chunk in Neo4j
20
+ class Indexer:
21
+ def __init__(self):
22
+ try:
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
+ logger.info(f"Caricamento del modello di embedding sul dispositivo: {device}")
25
+ #sentence-transformers gestisce l'ottimizzazione del caricamento
26
+ self.embedding_model = SentenceTransformer(os.getenv("EMBEDDING_MODEL_NAME"), device=device)
27
+ self.embedding_dimensions = self.embedding_model.get_sentence_embedding_dimension()
28
+ logger.info(f"Modello di embedding '{os.getenv('EMBEDDING_MODEL_NAME')}' caricato con {self.embedding_dimensions} dimensioni.")
29
+ except Exception as e:
30
+ logger.error(f"Errore durante il caricamento del modello di embedding: {e}")
31
+ raise e
32
+
33
+ #Genera l'embedding vettoriale per un dato testo. Aggiungo un cast a List[float] per compatibilità con Neo4j
34
+ def generate_embeddings(self, text:str) -> List[float]:
35
+ return self.embedding_model.encode(text).tolist()
36
+
37
+ #Orchestra l'indicizzazione dei chunk in Neo4j, gestendo la creazione del documento, dell'utente, del link e dell'inidice vettoriale
38
+ def index_chunks_to_neo4j(self, filename: str, chunks: list, user_id: str, lang: str = "it"):
39
+ if not chunks:
40
+ logger.warning("Nessun chunk fornito per l'indicizzazione.")
41
+ return
42
+
43
+ graph_db = None
44
+ try:
45
+ # 1. Inizializzo la connessione a Neo4j
46
+ graph_db = GraphDB()
47
+
48
+ # 2. Creazione/Aggiornamento nodi user e document
49
+ graph_db.create_user_node(user_id)
50
+ graph_db.create_document_node(filename)
51
+ graph_db.link_user_to_document(user_id, filename)
52
+
53
+ # 3. Creazione indice vettoriale (se non esiste)
54
+ graph_db.create_vector_index(
55
+ index_name="chunk_embeddings_index",
56
+ node_label="Chunk",
57
+ property_name="embedding",
58
+ vector_dimensions=self.embedding_dimensions
59
+ )
60
+
61
+ # 4. Inserimento chunk ed embedding
62
+ for i, chunk in enumerate(chunks):
63
+ content = chunk.page_content
64
+ metadata = chunk.metadata
65
+
66
+ # Ho deciso di assicurarmi che esista sempre un chunk_id valido
67
+ chunk_id = metadata.get("chunk_id") or f"{filename}_{i}"
68
+
69
+ # Genero l'embedding per il contenuto corrente
70
+ embedding = self.generate_embeddings(content)
71
+
72
+ # Ho deciso di implementare un controllo di sicurezza bloccante:
73
+ # se l'embedding è vuoto o ha dimensioni errate, salto l'inserimento per evitare nodi "sporchi"
74
+ if not embedding or len(embedding) != self.embedding_dimensions:
75
+ logger.error(f"FALLIMENTO CRITICO: Ho rilevato un embedding non valido per il chunk {chunk_id}. Dimensione: {len(embedding) if embedding else 0}")
76
+ continue
77
+
78
+ # Salvo il chunk, l'embedding e i metadati in Neo4j
79
+ graph_db.add_chunk_to_document(filename, chunk_id, content, embedding, metadata)
80
+ logger.debug(f"Ho indicizzato con successo il chunk '{chunk_id}' per il file '{filename}'.")
81
+
82
+ # Estrazione e collegamento delle entità tramite GLiNER
83
+ try:
84
+ entities = EntityExtractor.extract_ne(content)
85
+ for ent in entities:
86
+ graph_db.add_entity_to_chunk(ent["text"], ent["label"], chunk_id)
87
+ except Exception as ne_e:
88
+ # Ho deciso di loggare l'errore delle entità come warning per non bloccare l'intera pipeline
89
+ logger.warning(f"Non sono riuscito a estrarre entità per il chunk {chunk_id}: {ne_e}")
90
+
91
+ logger.info(f"Ho completato l'indicizzazione di {len(chunks)} chunk per il file '{filename}'.")
92
+
93
+ except Exception as e:
94
+ logger.error(f"Ho riscontrato un errore fatale durante l'indicizzazione in Neo4j per '{filename}': {e}")
95
+ raise
96
+ finally:
97
+ if graph_db:
98
+ graph_db.close()
99
+
100
+ # Metodo coordinatore per processare il file fisico
101
+ def index_pdf(self, file_path: str, user_id: str):
102
+ # Import locali per gestire la pipeline
103
+ from processingPdf.extractor import PDFExtractor
104
+ from processingPdf.chunker import Chunker
105
+
106
+ filename = os.path.basename(file_path)
107
+
108
+ # 1. Estrazione del testo strutturato dal PDF
109
+ extractor = PDFExtractor()
110
+ sections = extractor.extract_sections(file_path)
111
+
112
+ # 2. Suddivisione delle sezioni in chunk
113
+ chunker = Chunker()
114
+ chunks = chunker.create_chunks(sections, filename)
115
+
116
+ # 3. Indicizzazione finale su Neo4j
117
+ self.index_chunks_to_neo4j(filename, chunks, user_id)
processingPdf/loader.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Questa parte è responsabile dell'estrazione del testo e delle informazioni sul
2
+ #layout utilizzando spaCyLayout
3
+
4
+ import spacy
5
+ from typing import Any
6
+ from spacy_layout import spaCyLayout
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ #Configurazione del logger
12
+ logging.basicConfig(level=logging.INFO)
13
+
14
+ #Funzione per inizializzare e restituire l'istanza di spacy con il componenete spacylayout
15
+ #Ci permette di analizzare la struttura del PDF (colonne, titoli, etc.)
16
+ def get_layout_extractor():
17
+ #Inizializza un modello spaCy vuoto in italiano
18
+ nlp = spacy.blank("it")
19
+ #Aggiunge il componente spaCyLayout al modello
20
+ layout_extractor = spaCyLayout(nlp)
21
+
22
+ logger.info("Estrattore spaCyLayout per PDF inizializzato")
23
+ return layout_extractor
24
+
25
+ #Funzione per caricare il documento da bytes
26
+ def load_pdf_from_bytes(pdf_bytes: bytes, layout_extractor: Any):
27
+ try:
28
+ #Chiama l'estrattore sui bytes del PDF
29
+ doc = layout_extractor(pdf_bytes)
30
+ logger.info("Estrazione del layout del PDF completata")
31
+ return doc
32
+ except Exception as e:
33
+ logger.error(f"Errore durante il caricamento o l'estrazione del PDF: {e}")
34
+ return None
processingPdf/logicSections.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Questa parte si occupa di suddividere il documento processato in sezioni logiche
2
+ #(capitoli, sottosezioni, etc.) sfruttando le etichette di layout.
3
+
4
+ import logging
5
+ from typing import Dict, Any
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ #Suddivide il documneto spaCy in sezioni logiche basandosi sulle etichette di layout. Il "doc" è il documento spaCy processato da load_pdf_from_bytes
10
+ #Ritorna un dizionario dove la chiave è il titolo della sezione e il valore è il testo associato
11
+ def extract_logical_sections(doc: Any) -> Dict[str, Any]:
12
+ sections = {}
13
+ #Questo è un titolo segnaposto per eventuale contenuto iniziale non etichettato da un header
14
+ current_title = "preambolo_documento"
15
+ sections[current_title] = ""
16
+
17
+ #Etichette che indicano l'inizio di una NUOVA SEZIONE LOGICA
18
+ SECTION_LABELS = ("SECTION_HEADER", "TITLE", "BOLD", "BOLD_CAPTION")
19
+
20
+ #Etichette di Contenuto (da trattare come corpo testuale)
21
+ CONTENT_LABELS = ("TEXT", "LIST", "PARAGRAPH")
22
+
23
+ if not hasattr(doc, 'spans') or not doc.spans.get ("layout"):
24
+ if hasattr(doc, 'text') and doc.text.strip():
25
+ logger.warning("Nessun layout distinto trovato, restituisco il documento completo come sezione unica.")
26
+ return {"documento_completo": doc.text.strip()}
27
+ return {}
28
+
29
+ #Itera su tutti gli span etichettati
30
+ for span in doc.spans.get("layout", []):
31
+ label = span.label_.upper()
32
+ span_text = span.text.strip()
33
+
34
+ if not span_text:
35
+ continue
36
+
37
+ #1. Identificazione e cambio di sezione
38
+ if label in SECTION_LABELS:
39
+ potential_title = span_text.lower()
40
+ #Assicuriamo l'unicità e un minimo di lunghezza
41
+ if len(potential_title) > 3 and potential_title not in sections:
42
+ current_title = potential_title
43
+ sections[current_title] = ""
44
+ elif current_title in sections:
45
+ #Se il titolo non cambia, aggiungiamo il testo (utile per titoli multi-linea)
46
+ sections[current_title] += span_text + "\n"
47
+
48
+ #2. Gestione Esplicita di informazioni tabulari e immagini
49
+ elif label == "TABLE_CAPTION":
50
+ #Usiamo la caption come nuovo titolo di sezione temporaneo
51
+ current_title = f"tabella: {span_text.lower()[:100]}"
52
+ if current_title not in sections:
53
+ sections[current_title] = span_text + "\n"
54
+
55
+ elif label == "FIGURE_CAPTION":
56
+ #Usiamo la caption come nuovo titolo di sezione temporaneo (per immagini ora)
57
+ current_title = f"figura: {span_text.lower()[:100]}"
58
+ if current_title not in sections:
59
+ sections[current_title] = span_text + "\n"
60
+
61
+ #3. Gestione del Testo del Corpo/Contenuto
62
+ elif label in CONTENT_LABELS:
63
+ #Aggiunge il testo sotto la sezione corrente o preambolo
64
+ sections[current_title] += span_text + "\n"
65
+
66
+ #4. Blocchi di contenuto generici (Es. Table o Figure senza caption)
67
+ elif label in ("TABLE", "FIGURE"):
68
+ # Se la label è TABLE o FIGURE e non abbiamo ancora una caption,
69
+ # usiamo un titolo generico per non perdere il testo.
70
+ if "tabella:" not in current_title and "figura:" not in current_title:
71
+ current_title = f"blocco_generico_{label.lower()}"
72
+ if current_title not in sections:
73
+ sections[current_title] = ""
74
+ sections[current_title] += span_text + "\n"
75
+
76
+ #Pulisce le sezioni vuote e rimuove spazi iniziali/finali
77
+ cleaned_sections = {k: v.strip() for k, v in sections.items() if v.strip()}
78
+
79
+ #Gestisce il caso di documenti con molto rumore o layout non convenzionale
80
+ if not cleaned_sections and hasattr(doc, 'text') and doc.text.strip():
81
+ logger.warning("Suddivisione per layout fallita, ritorno il documento completo come sezione unica.")
82
+ return {"documento_completo": doc.text.strip()}
83
+
84
+ logger.info(f"Documento suddiviso in {len(cleaned_sections)} sezioni logiche.")
85
+ return cleaned_sections
processingPdf/reranker.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import logging
3
+ from sentence_transformers import CrossEncoder
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class Reranker:
8
+ def __init__(self, model_name="BAAI/bge-reranker-v2-m3"):
9
+ try:
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ #il CrossEncoder riceve coppie (domanda, chunk) e restituisce un punteggio
12
+ self.model = CrossEncoder(model_name, device=device)
13
+ logger.info(f"Re-ranker '{model_name}' caricato con successo.")
14
+ except Exception as e:
15
+ logger.error(f"Errore nel caricamento del Re-ranker: {e}")
16
+ raise
17
+
18
+ #riceve la query e la lista di chunks restituendo i top 5 (in questo caso) più rilevanti
19
+ def rerank(self, query: str, documents: list, top_n: int = 5):
20
+ if not documents:
21
+ return []
22
+ #coppie per il cross encoder
23
+ pairs = [[query, doc] for doc in documents]
24
+ #calcolo gli score di pertinenza
25
+ scores = self.model.predict(pairs)
26
+ #unisco i chunks ai loro score e li ordino
27
+ scored_docs= sorted(zip(scores, documents), key=lambda x: x[0], reverse=True)
28
+ return [doc for score, doc in scored_docs[:top_n]]
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ gliner
3
+ groq
4
+ hf_xet
5
+ huggingface_hub
6
+ langchain-core
7
+ langchain-text-splitters
8
+ langgraph
9
+ mistralai
10
+ neo4j
11
+ python-dotenv
12
+ python-multipart
13
+ sentence-transformers
14
+ spacy
15
+ spacy-layout
16
+ torch
17
+ uvicorn