Spaces:

klydekushy
/

OCR_PROSPECTUS

Sleeping

App Files Files Community

klydekushy commited on 29 days ago

Commit

eee152b

verified ·

1 Parent(s): 06cff05

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -78

app.py CHANGED Viewed

@@ -96,7 +96,7 @@ col4.metric("Confiance IA", "98.4%")
 st.markdown("---")
 # --- NAVIGATION PAR ONGLETS (STYLE PALANTIR) ---
-tab_ingestion, tab_intelligence = st.tabs(["INGESTION & OCR", "ENTITES & RELATIONS"])
 # --- TAB 1: INGESTION ---
 with tab_ingestion:
@@ -105,7 +105,7 @@ with tab_ingestion:
     with col_u1:
         st.subheader("◯⎯| CHARGEMENT DOCUMENTS")
         uploaded_files = st.file_uploader("Fichiers PDF/IMG", accept_multiple_files=True)
-        if uploaded_files and st.button("INITIER LA SÉQUENCE OCR"):
             for uploaded_file in uploaded_files:
                 file_path = INPUT_DIR / uploaded_file.name
                 with open(file_path, "wb") as f:
@@ -118,16 +118,15 @@ with tab_ingestion:
     with col_u2:
         st.subheader("◯⎯| TEXTE LIBRE")
         free_text = st.text_area("Coller du texte ici", height=150)
-        if st.button("ANALYSER TEXTE"):
             temp_path = INPUT_DIR / f"text_{int(time.time())}.md"
             with open(temp_path, "w", encoding="utf-8") as f: f.write(free_text)
             st.session_state.engine.process_document(temp_path, OUTPUT_DIR)
             st.rerun()
-# --- TAB 2: INTELLIGENCE ---
-with tab_intelligence:
-    # 1. Vérification des fichiers disponibles
     json_files = list(OUTPUT_DIR.glob("*.json"))
     if not json_files:
         st.info("Aucun document analysé disponible. Allez dans l'onglet INGESTION.")
@@ -135,101 +134,103 @@ with tab_intelligence:
         # Barre d'outils (Sélection et Suppression)
         col_select, col_delete = st.columns([3, 1])
         with col_select:
-            selected_file = st.selectbox("Sélectionner un artefact", json_files, format_func=lambda x: x.name)
         with col_delete:
             st.write("")
-            if st.button("SUPPRIMER", use_container_width=True):
                 os.remove(selected_file)
                 st.rerun()
         st.markdown("---")
-        # 2. Chargement du texte extrait
         with open(selected_file, 'r', encoding='utf-8') as f:
             data = json.load(f)
             text_extracted = " ".join([t.get("text", "") for t in data.get("texts", [])])
-        col_inf1, col_inf2 = st.columns([1, 2])
         with col_inf1:
             st.markdown("### TEXTE SOURCE")
             st.text_area("Données issues de l'OCR", text_extracted, height=500)
         with col_inf2:
-            st.markdown("### GRAPHE DE CONNAISSANCE AUTOMATIQUE")
-            # Bouton de génération avec découpage (Chunking)
-            if st.button("🧬 GÉNÉRER L'INTELLIGENCE VISUELLE", use_container_width=True):
-                with st.spinner("Analyse fragmentée et colorisation dynamique en cours..."):
-                    # ia_temp est récupéré depuis le slider de votre sidebar
                     graph_data = st.session_state.extractor.extract_long_text(text_extracted, temperature=ia_temp)
                     if graph_data:
                         st.session_state.last_graph = graph_data
-                        st.success(f"Analyse terminée : {len(graph_data.get('entities', []))} entités trouvées.")
                     else:
                         st.error("L'IA n'a pas pu structurer les données.")
-            # 3. Visualisation Interactive avec PyVis
-            if 'last_graph' in st.session_state and st.session_state.last_graph:
-                try:
-                    # Initialisation du graphe (Correction NameError effectuée par l'import en haut d'app.py)
-                    net = Network(height="600px", width="100%", bgcolor="#0b0d11", font_color="#e6edf3", directed=True)
-                    # --- LOGIQUE DE COULEURS ALÉATOIRES ET AUTOMATIQUES ---
-                    import hashlib
-                    def auto_color(text):
-                        # Génère une couleur Hex unique basée sur le texte (le type d'entité)
-                        hash_hex = hashlib.md5(text.lower().encode()).hexdigest()
-                        return f"#{hash_hex[:6]}" # Prend les 6 premiers caractères du hash pour le code couleur
-                    # Collecte des types pour la légende
-                    found_types = {}
-                    # Ajout des Noeuds
-                    for ent in st.session_state.last_graph.get("entities", []):
-                        e_type = ent.get("type", "Unknown")
-                        e_color = auto_color(e_type)
-                        found_types[e_type] = e_color # Stocke pour la légende
-                        net.add_node(
-                            ent["id"],
-                            label=ent["name"],
-                            title=f"TYPE: {e_type}\n{ent.get('description')}",
-                            color=e_color,
-                            shape="dot",
-                            size=25
-                        )
-                    # Ajout des Relations
-                    for rel in st.session_state.last_graph.get("relationships", []):
-                        net.add_edge(
-                            rel["from"],
-                            rel["to"],
-                            label=rel.get("type", "LINK"),
-                            color="#30363d",
-                            arrows="to"
-                        )
-                    # Configuration physique pour le mouvement
-                    net.set_options('{"physics": {"forceAtlas2Based": {"gravitationalConstant": -50, "centralGravity": 0.01}, "solver": "forceAtlas2Based"}}')
-                    # 4. Affichage de la légende dynamique
-                    st.write("**Légende détectée :**")
-                    leg_cols = st.columns(len(found_types) if len(found_types) > 0 else 1)
-                    for idx, (t_name, t_color) in enumerate(found_types.items()):
-                        leg_cols[idx % len(leg_cols)].markdown(f"<span style='color:{t_color}'>●</span> {t_name}", unsafe_allow_html=True)
-                    # Rendu HTML final
-                    path = "temp_graph.html"
-                    net.save_graph(path)
-                    with open(path, 'r', encoding='utf-8') as f:
-                        html_content = f.read()
-                    # Utilisation du module components pour l'affichage
-                    st.components.v1.html(html_content, height=650)
-                except Exception as e:
-                    st.error(f"Erreur de rendu visuel : {e}")

 st.markdown("---")
 # --- NAVIGATION PAR ONGLETS (STYLE PALANTIR) ---
+tab_ingestion, tab_intelligence = st.tabs(["INGESTION & OCR", "ENTITES & RELATIONS","VISUALISATION GRAPHE"])
 # --- TAB 1: INGESTION ---
 with tab_ingestion:
     with col_u1:
         st.subheader("◯⎯| CHARGEMENT DOCUMENTS")
         uploaded_files = st.file_uploader("Fichiers PDF/IMG", accept_multiple_files=True)
+        if uploaded_files and st.button("INITIER LA SÉQUENCE"):
             for uploaded_file in uploaded_files:
                 file_path = INPUT_DIR / uploaded_file.name
                 with open(file_path, "wb") as f:
     with col_u2:
         st.subheader("◯⎯| TEXTE LIBRE")
         free_text = st.text_area("Coller du texte ici", height=150)
+        if st.button("INITIER LA SÉQUENCE"):
             temp_path = INPUT_DIR / f"text_{int(time.time())}.md"
             with open(temp_path, "w", encoding="utf-8") as f: f.write(free_text)
             st.session_state.engine.process_document(temp_path, OUTPUT_DIR)
             st.rerun()
+# --- TAB 2: ENTITÉS & RELATIONS ---
+with tab_entities:
     json_files = list(OUTPUT_DIR.glob("*.json"))
     if not json_files:
         st.info("Aucun document analysé disponible. Allez dans l'onglet INGESTION.")
         # Barre d'outils (Sélection et Suppression)
         col_select, col_delete = st.columns([3, 1])
         with col_select:
+            selected_file = st.selectbox("Sélectionner un artefact", json_files, format_func=lambda x: x.name, key="select_entity")
         with col_delete:
             st.write("")
+            if st.button("SUPPRIMER", key="del_entity", use_container_width=True):
                 os.remove(selected_file)
                 st.rerun()
         st.markdown("---")
+        # Chargement du texte extrait
         with open(selected_file, 'r', encoding='utf-8') as f:
             data = json.load(f)
             text_extracted = " ".join([t.get("text", "") for t in data.get("texts", [])])
+        col_inf1, col_inf2 = st.columns([1, 1])
         with col_inf1:
             st.markdown("### TEXTE SOURCE")
             st.text_area("Données issues de l'OCR", text_extracted, height=500)
         with col_inf2:
+            st.markdown("### EXTRACTION DES DONNÉES")
+            if st.button("GÉNÉRER LES ENTITÉS & RELATIONS", use_container_width=True):
+                with st.spinner("Analyse fragmentée par le cerveau Qwen..."):
+                    # ia_temp vient du slider de votre sidebar
                     graph_data = st.session_state.extractor.extract_long_text(text_extracted, temperature=ia_temp)
                     if graph_data:
                         st.session_state.last_graph = graph_data
+                        st.success(f"Extraction réussie : {len(graph_data.get('entities', []))} entités trouvées.")
                     else:
                         st.error("L'IA n'a pas pu structurer les données.")
+            if 'last_graph' in st.session_state:
+                st.markdown("#### FORMAT JSON (BRUT)")
+                # Affiche uniquement le JSON comme demandé
+                st.json(st.session_state.last_graph)
+# --- TAB 3: VISUALISATION GRAPHE ---
+with tab_visualisation:
+    st.subheader("◯⎯| INTERFACE CINÉTIQUE VISUELLE")
+    if 'last_graph' in st.session_state and st.session_state.last_graph:
+        try:
+            # Initialisation du graphe PyVis
+            net = Network(height="700px", width="100%", bgcolor="#0b0d11", font_color="#e6edf3", directed=True)
+            import hashlib
+            def auto_color(text):
+                hash_hex = hashlib.md5(text.lower().encode()).hexdigest()
+                return f"#{hash_hex[:6]}"
+            found_types = {}
+            # Ajout des Noeuds
+            for ent in st.session_state.last_graph.get("entities", []):
+                e_type = ent.get("type", "Unknown")
+                e_color = auto_color(e_type)
+                found_types[e_type] = e_color
+                net.add_node(
+                    ent["id"],
+                    label=ent["name"],
+                    title=f"TYPE: {e_type}\n{ent.get('description')}",
+                    color=e_color,
+                    shape="dot",
+                    size=25
+                )
+            # Ajout des Relations
+            for rel in st.session_state.last_graph.get("relationships", []):
+                net.add_edge(
+                    rel["from"],
+                    rel["to"],
+                    label=rel.get("type", "LINK"),
+                    color="#30363d",
+                    arrows="to"
+                )
+            net.set_options('{"physics": {"forceAtlas2Based": {"gravitationalConstant": -100, "centralGravity": 0.01}, "solver": "forceAtlas2Based"}}')
+            # Affichage de la légende
+            st.write("**Légende détectée :**")
+            leg_cols = st.columns(len(found_types) if len(found_types) > 0 else 1)
+            for idx, (t_name, t_color) in enumerate(found_types.items()):
+                leg_cols[idx % len(leg_cols)].markdown(f"<span style='color:{t_color}'>●</span> {t_name}", unsafe_allow_html=True)
+            # Rendu du graphe
+            path = "temp_graph_viz.html"
+            net.save_graph(path)
+            with open(path, 'r', encoding='utf-8') as f:
+                st.components.v1.html(f.read(), height=750)
+        except Exception as e:
+            st.error(f"Erreur de rendu visuel : {e}")
+    else:
+        st.warning("⚠️ Aucune donnée disponible. Veuillez d'abord générer l'intelligence dans l'onglet 'ENTITÉS & RELATIONS'.")