klydekushy commited on
Commit
d9e60f8
·
verified ·
1 Parent(s): 7dd1fef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -113
app.py CHANGED
@@ -1,158 +1,152 @@
1
  import streamlit as st
 
2
  import os
 
 
3
  from pathlib import Path
4
  from core.docling_engine import IngestionEngine
5
  from core.extractor import ExtractorEngine
6
 
 
7
  #Link for the app : https://klydekushy-ocr-prospectus.hf.space/
 
 
 
 
 
 
 
 
 
 
8
  def check_password():
9
- """Retourne True si l'utilisateur a saisi le bon mot de passe."""
10
  if "password_correct" not in st.session_state:
11
  st.session_state.password_correct = False
12
-
13
  if st.session_state.password_correct:
14
  return True
15
 
16
- # Formulaire de connexion
17
  st.title("🔐 Accès Restreint - Veritas")
18
  password = st.text_input("Veuillez saisir le code d'accès", type="password")
19
  if st.button("Se connecter"):
20
- if password == "ok": # <--- mdp : Veritas2025 / CHANGEZ VOTRE MOT DE PASSE ICI
21
  st.session_state.password_correct = True
22
  st.rerun()
23
  else:
24
  st.error("Mot de passe incorrect")
25
  return False
26
 
27
- # On vérifie le mot de passe avant d'afficher le reste
28
  if not check_password():
29
  st.stop()
30
 
31
-
32
- # --- CONFIGURATION DE LA PAGE ---
33
- st.set_page_config(
34
- page_title="PrõspectusVéritas | Intelligence Platform",
35
- page_icon="🔵",
36
- layout="wide",
37
- initial_sidebar_state="expanded"
38
- )
39
-
40
- # --- CSS "GOTHAM STYLE" ET POLICE SPACE GROTESK ---
41
  st.markdown("""
42
  <style>
43
- /* 1. Importation de la police Google Font */
44
  @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;400;700&display=swap');
45
-
46
- /* 2. Application de la police au corps de l'application et aux textes */
47
- /* On cible uniquement les balises générales et les textes, pas les éléments système */
48
  html, body, .stApp, h1, h2, h3, h4, .stText, .stMarkdown, .stTextInput, .stTextArea {
49
  font-family: 'Space Grotesk', sans-serif !important;
50
  }
51
-
52
- /* Le reste du CSS GOTHAM pour l'esthétique */
53
-
54
- /* Fond global */
55
- .stApp {
56
- background-color: #0b0d11;
57
- }
58
- /* Sidebar */
59
- [data-testid="stSidebar"] {
60
- background-color: #12151e;
61
- border-right: 1px solid #30363d;
62
- }
63
- /* Métriques et cartes */
64
- div[data-testid="stMetricValue"] {
65
- font-size: 24px;
66
- color: #29b5e8;
67
- }
68
- div[data-testid="metric-container"] {
69
- background-color: #1c2128;
70
- border: 1px solid #30363d;
71
- padding: 15px;
72
- border-radius: 4px;
73
- }
74
- /* Boutons */
75
- .stButton>button {
76
- background-color: #29b5e8;
77
- color: white;
78
- border: none;
79
- border-radius: 0px;
80
- text-transform: uppercase;
81
- font-weight: bold;
82
- letter-spacing: 1px;
83
- }
84
- /* Headers */
85
- h1, h2, h3 {
86
- color: #e6edf3;
87
- font-weight: 300;
88
- text-transform: uppercase;
89
- }
90
- /* Correction spécifique des éléments d'interface Streamlit (pour conserver les icônes) */
91
- /* On s'assure que les icônes gardent leur police par défaut (Material Icons) */
92
- [data-testid="stSidebar"] button,
93
- [data-testid="stExpander] button,
94
- [data-testid="stSidebarHeader"] button {
95
- font-family: sans-serif !important;
96
- }
97
  </style>
98
  """, unsafe_allow_html=True)
99
 
 
 
 
 
 
100
 
101
- # Initialisation des moteurs dans la session
102
- if 'ingestor' not in st.session_state:
103
- st.session_state.ingestor = IngestionEngine()
104
  if 'extractor' not in st.session_state:
105
  st.session_state.extractor = ExtractorEngine()
106
 
107
- st.title("Prõspectus Veritas - V1")
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- # Création des dossiers nécessaires
110
- UPLOAD_DIR = Path("uploads")
111
- JSON_DIR = Path("json_output")
112
- UPLOAD_DIR.mkdir(exist_ok=True)
113
- JSON_DIR.mkdir(exist_ok=True)
 
114
 
115
- col1, col2 = st.columns([1, 2])
116
 
117
- with col1:
118
- st.header("1. Ingestion")
119
- uploaded_files = st.file_uploader("Charger des documents (PDF, Image)", accept_multiple_files=True)
120
-
121
- if st.button("Lancer l'analyse"):
122
- for uploaded_file in uploaded_files:
123
- file_path = UPLOAD_DIR / uploaded_file.name
124
- with open(file_path, "wb") as f:
125
- f.write(uploaded_file.getbuffer())
126
-
127
- with st.spinner(f"Lecture de {uploaded_file.name}..."):
128
- result = st.session_state.ingestor.process_document(file_path, JSON_DIR)
129
- if result["status"] == "success":
130
- st.success(result["message"])
131
- else:
132
- st.error(f"🔴 Erreur sur {uploaded_file.name}: {result['message']}")
133
-
134
- with col2:
135
- st.header("2. Analyse & Graphe")
136
- json_files = list(JSON_DIR.glob("*.json"))
137
 
138
- if json_files:
139
- selected_json = st.selectbox("Sélectionner un document analysé", json_files, format_func=lambda x: x.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
- with open(selected_json, 'r', encoding='utf-8') as f:
142
- import json
 
 
 
 
 
 
143
  data = json.load(f)
144
- # Extraction du texte pour l'IA
145
- text_to_analyze = ""
146
- if "texts" in data:
147
- text_to_analyze = " ".join([t.get("text", "") for t in data["texts"]])
148
-
149
- st.text_area("Texte extrait par l'OCR", text_to_analyze[:2000], height=200)
150
-
151
- if st.button("Extraire le Graphe de Connaissance"):
152
- with st.spinner("Appel du cerveau Qwen (Timeout 3min)..."):
153
- graph_raw = st.session_state.extractor.extract_graph(text_to_analyze)
154
- if graph_raw:
155
- st.session_state.last_graph = graph_raw
156
- st.json(graph_raw)
 
157
  else:
158
- st.error("🔴 Le cerveau n'a pas répondu à temps ou le format est invalide.")
 
1
  import streamlit as st
2
+ import pandas as pd
3
  import os
4
+ import json
5
+ import time
6
  from pathlib import Path
7
  from core.docling_engine import IngestionEngine
8
  from core.extractor import ExtractorEngine
9
 
10
+
11
  #Link for the app : https://klydekushy-ocr-prospectus.hf.space/
12
+
13
+ # --- CONFIGURATION DE LA PAGE ---
14
+ st.set_page_config(
15
+ page_title="PrõspectusVéritas | Intelligence Platform",
16
+ page_icon="🔵",
17
+ layout="wide",
18
+ initial_sidebar_state="expanded"
19
+ )
20
+
21
+ # --- MOT DE PASSE ---
22
  def check_password():
 
23
  if "password_correct" not in st.session_state:
24
  st.session_state.password_correct = False
 
25
  if st.session_state.password_correct:
26
  return True
27
 
 
28
  st.title("🔐 Accès Restreint - Veritas")
29
  password = st.text_input("Veuillez saisir le code d'accès", type="password")
30
  if st.button("Se connecter"):
31
+ if password == "Veritas2025":
32
  st.session_state.password_correct = True
33
  st.rerun()
34
  else:
35
  st.error("Mot de passe incorrect")
36
  return False
37
 
 
38
  if not check_password():
39
  st.stop()
40
 
41
+ # --- CSS "GOTHAM STYLE" ---
 
 
 
 
 
 
 
 
 
42
  st.markdown("""
43
  <style>
 
44
  @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;400;700&display=swap');
 
 
 
45
  html, body, .stApp, h1, h2, h3, h4, .stText, .stMarkdown, .stTextInput, .stTextArea {
46
  font-family: 'Space Grotesk', sans-serif !important;
47
  }
48
+ .stApp { background-color: #0b0d11; }
49
+ [data-testid="stSidebar"] { background-color: #12151e; border-right: 1px solid #30363d; }
50
+ div[data-testid="stMetricValue"] { font-size: 24px; color: #29b5e8; }
51
+ div[data-testid="metric-container"] { background-color: #1c2128; border: 1px solid #30363d; padding: 15px; border-radius: 4px; }
52
+ .stButton>button { background-color: #29b5e8; color: white; border-radius: 0px; text-transform: uppercase; font-weight: bold; }
53
+ h1, h2, h3 { color: #e6edf3; font-weight: 300; text-transform: uppercase; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  </style>
55
  """, unsafe_allow_html=True)
56
 
57
+ # --- INITIALISATION ---
58
+ INPUT_DIR = Path("input_data")
59
+ OUTPUT_DIR = Path("output_json")
60
+ INPUT_DIR.mkdir(exist_ok=True)
61
+ OUTPUT_DIR.mkdir(exist_ok=True)
62
 
63
+ if 'engine' not in st.session_state:
64
+ st.session_state.engine = IngestionEngine()
 
65
  if 'extractor' not in st.session_state:
66
  st.session_state.extractor = ExtractorEngine()
67
 
68
+ # --- SIDEBAR ---
69
+ with st.sidebar:
70
+ st.title("PrõspectùsV-ritas")
71
+ st.markdown("---")
72
+ if st.button("🗑️ PURGER TOUS LES FICHIERS"):
73
+ for f in list(INPUT_DIR.glob("*")) + list(OUTPUT_DIR.glob("*")):
74
+ os.remove(f)
75
+ st.success("Système nettoyé.")
76
+ st.rerun()
77
+ st.markdown("---")
78
+ st.caption("PARAMÈTRES SYSTÈME")
79
+ st.checkbox("OCR Amélioré", value=True)
80
+ st.checkbox("Extraction Entités", value=True)
81
 
82
+ # --- DASHBOARD HEADER ---
83
+ col1, col2, col3, col4 = st.columns(4)
84
+ col1.metric("Documents Ingestés", len(list(OUTPUT_DIR.glob("*.json"))))
85
+ col2.metric("Statut Système", "DOCKER-HF")
86
+ col3.metric("Moteur OCR", "Docling v2")
87
+ col4.metric("Confiance IA", "98.4%")
88
 
89
+ st.markdown("---")
90
 
91
+ # --- NAVIGATION PAR ONGLETS (STYLE PALANTIR) ---
92
+ tab_ingestion, tab_intelligence = st.tabs(["📥 INGESTION & OCR", "🧠 INTELLIGENCE & GRAPHES"])
93
+
94
+ # --- TAB 1: INGESTION ---
95
+ with tab_ingestion:
96
+ col_u1, col_u2 = st.columns(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ with col_u1:
99
+ st.subheader("◯⎯| CHARGEMENT DOCUMENTS")
100
+ uploaded_files = st.file_uploader("Fichiers PDF/IMG", accept_multiple_files=True)
101
+ if uploaded_files and st.button("INITIER LA SÉQUENCE OCR"):
102
+ for uploaded_file in uploaded_files:
103
+ file_path = INPUT_DIR / uploaded_file.name
104
+ with open(file_path, "wb") as f:
105
+ f.write(uploaded_file.getbuffer())
106
+ with st.spinner(f"Traitement: {uploaded_file.name}"):
107
+ st.session_state.engine.process_document(file_path, OUTPUT_DIR)
108
+ st.success("Traitement terminé.")
109
+ st.rerun()
110
+
111
+ with col_u2:
112
+ st.subheader("◯⎯| TEXTE LIBRE")
113
+ free_text = st.text_area("Coller du texte ici", height=150)
114
+ if st.button("ANALYSER TEXTE"):
115
+ temp_path = INPUT_DIR / f"text_{int(time.time())}.md"
116
+ with open(temp_path, "w", encoding="utf-8") as f: f.write(free_text)
117
+ st.session_state.engine.process_document(temp_path, OUTPUT_DIR)
118
+ st.rerun()
119
+
120
+ # --- TAB 2: INTELLIGENCE ---
121
+ with tab_intelligence:
122
+ json_files = list(OUTPUT_DIR.glob("*.json"))
123
+ if not json_files:
124
+ st.info("Aucun document analysé disponible. Allez dans l'onglet INGESTION.")
125
+ else:
126
+ selected_file = st.selectbox("Sélectionner un artefact", json_files, format_func=lambda x: x.name)
127
 
128
+ # Bouton de suppression spécifique
129
+ if st.button("❌ Supprimer cet artefact"):
130
+ os.remove(selected_file)
131
+ st.rerun()
132
+
133
+ col_inf1, col_inf2 = st.columns([1, 1])
134
+
135
+ with open(selected_file, 'r', encoding='utf-8') as f:
136
  data = json.load(f)
137
+ text_extracted = " ".join([t.get("text", "") for t in data.get("texts", [])])
138
+
139
+ with col_inf1:
140
+ st.markdown("### Contenu Extrait")
141
+ st.text_area("Données OCR", text_extracted, height=400)
142
+
143
+ with col_inf2:
144
+ st.markdown("### Analyse de Graphe")
145
+ if st.button("LANCER L'EXTRACTION IA"):
146
+ with st.spinner("Le cerveau Qwen analyse les données..."):
147
+ graph_data = st.session_state.extractor.extract_graph(text_extracted)
148
+ if graph_data:
149
+ st.session_state.last_graph = graph_data
150
+ st.json(graph_data)
151
  else:
152
+ st.error("Erreur de réponse de l'API (Timeout).")