Spaces:
Sleeping
Sleeping
Passage par GitHub Gist pour uploader jusqu'à 10Mo dans Qdrant
Browse files- app.py +103 -187
- requirements.txt +1 -5
app.py
CHANGED
|
@@ -2,7 +2,6 @@ import streamlit as st
|
|
| 2 |
import hashlib
|
| 3 |
import requests
|
| 4 |
import os
|
| 5 |
-
import json
|
| 6 |
import pytz
|
| 7 |
import time
|
| 8 |
import unicodedata
|
|
@@ -10,6 +9,7 @@ import importlib.metadata
|
|
| 10 |
import re
|
| 11 |
import tempfile
|
| 12 |
import uuid
|
|
|
|
| 13 |
from pydantic import BaseModel, validator
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
from sentence_transformers import SentenceTransformer
|
|
@@ -20,11 +20,6 @@ from datetime import datetime, timedelta
|
|
| 20 |
from collections import defaultdict
|
| 21 |
from PyPDF2 import PdfReader
|
| 22 |
from docx import Document
|
| 23 |
-
from google.oauth2 import service_account
|
| 24 |
-
from googleapiclient.discovery import build
|
| 25 |
-
from googleapiclient.http import MediaFileUpload
|
| 26 |
-
# from unstructured.partition.pdf import partition_pdf # Optionnel, pour une extraction plus fine
|
| 27 |
-
# from unstructured.partition.docx import partition_docx
|
| 28 |
|
| 29 |
try:
|
| 30 |
import pdfplumber
|
|
@@ -349,76 +344,6 @@ class ResponseDocument(BaseModel):
|
|
| 349 |
|
| 350 |
# --- 1a. Fonctions d'upload, d'indexation et d'embedding
|
| 351 |
|
| 352 |
-
# Fonction pour uploader un fichier dans le Drive
|
| 353 |
-
def upload_to_drive(uploaded_file):
|
| 354 |
-
"""Upload le fichier vers ton Drive et retourne son ID + chemin temporaire"""
|
| 355 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
|
| 356 |
-
tmp_file.write(uploaded_file.getvalue())
|
| 357 |
-
tmp_path = tmp_file.name
|
| 358 |
-
|
| 359 |
-
file_metadata = {"name": uploaded_file.name, "parents": [FOLDER_ID]}
|
| 360 |
-
media = MediaFileUpload(tmp_path, resumable=True)
|
| 361 |
-
file = drive_service.files().create(body=file_metadata, media_body=media, fields="id").execute()
|
| 362 |
-
return file.get("id"), tmp_path
|
| 363 |
-
|
| 364 |
-
# Fonction qui permet de reprendre un upload interrompu
|
| 365 |
-
def upload_to_drive_resumable(uploaded_file):
|
| 366 |
-
"""Upload vers Google Drive en mode resumable (chunked) avec logs de debug"""
|
| 367 |
-
st.write("🔧 Début de upload_to_drive_resumable")
|
| 368 |
-
print("DEBUG: Entrée dans upload_to_drive_resumable")
|
| 369 |
-
|
| 370 |
-
# Sauvegarde temporaire du fichier
|
| 371 |
-
try:
|
| 372 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
|
| 373 |
-
data = uploaded_file.getvalue()
|
| 374 |
-
st.write(f"📦 Taille du fichier lu : {len(data)} octets")
|
| 375 |
-
print("DEBUG: Taille du fichier lu:", len(data))
|
| 376 |
-
tmp_file.write(data)
|
| 377 |
-
tmp_path = tmp_file.name
|
| 378 |
-
st.write(f"📄 Fichier temporaire créé : {tmp_path}")
|
| 379 |
-
print("DEBUG: Fichier temporaire créé:", tmp_path)
|
| 380 |
-
except Exception as e:
|
| 381 |
-
st.error(f"❌ Erreur lors de la création du fichier temporaire : {e}")
|
| 382 |
-
print("DEBUG: Erreur création fichier temporaire:", e)
|
| 383 |
-
raise
|
| 384 |
-
|
| 385 |
-
# Préparation des métadonnées
|
| 386 |
-
file_metadata = {"name": uploaded_file.name, "parents": [FOLDER_ID]}
|
| 387 |
-
st.write(f"📝 Métadonnées du fichier : {file_metadata}")
|
| 388 |
-
print("DEBUG: Métadonnées du fichier:", file_metadata)
|
| 389 |
-
|
| 390 |
-
try:
|
| 391 |
-
media = MediaFileUpload(tmp_path, resumable=True)
|
| 392 |
-
request = drive_service.files().create(body=file_metadata, media_body=media, fields="id")
|
| 393 |
-
st.write("🚀 Requête Drive initialisée")
|
| 394 |
-
print("DEBUG: Requête Drive initialisée")
|
| 395 |
-
except Exception as e:
|
| 396 |
-
st.error(f"❌ Erreur initialisation requête Drive : {e}")
|
| 397 |
-
print("DEBUG: Erreur initialisation requête Drive:", e)
|
| 398 |
-
raise
|
| 399 |
-
|
| 400 |
-
# Upload en chunks
|
| 401 |
-
response = None
|
| 402 |
-
try:
|
| 403 |
-
while response is None:
|
| 404 |
-
status, response = request.next_chunk()
|
| 405 |
-
if status:
|
| 406 |
-
percent = int(status.progress() * 100)
|
| 407 |
-
st.write(f"⬆️ Upload vers Drive : {percent}%")
|
| 408 |
-
print("DEBUG: Progression upload:", percent, "%")
|
| 409 |
-
st.write("✅ Upload terminé")
|
| 410 |
-
print("DEBUG: Upload terminé, réponse:", response)
|
| 411 |
-
except Exception as e:
|
| 412 |
-
st.error(f"❌ Erreur pendant l'upload vers Drive : {e}")
|
| 413 |
-
print("DEBUG: Erreur pendant l'upload:", e)
|
| 414 |
-
raise
|
| 415 |
-
|
| 416 |
-
file_id = response.get("id")
|
| 417 |
-
st.write(f"📂 ID du fichier Drive : {file_id}")
|
| 418 |
-
print("DEBUG: ID du fichier Drive:", file_id)
|
| 419 |
-
|
| 420 |
-
return file_id, tmp_path
|
| 421 |
-
|
| 422 |
# Fonction pour extraire le texte d'un document pdf
|
| 423 |
def extract_text(pdf_path: str, max_pages: Optional[int] = None) -> str:
|
| 424 |
"""Extrait le texte d'un PDF, avec gestion des erreurs et des pages vides (VOTRE FONCTION)."""
|
|
@@ -438,10 +363,6 @@ def extract_text(pdf_path: str, max_pages: Optional[int] = None) -> str:
|
|
| 438 |
# Fonction pour extraire le texte d'un document Word
|
| 439 |
def extract_text_from_docx(file_path: str, use_unstructured: bool = False) -> str:
|
| 440 |
"""Extrait le texte d'un fichier Word."""
|
| 441 |
-
# if use_unstructured:
|
| 442 |
-
# elements = partition_docx(file_path)
|
| 443 |
-
# text = "\n\n".join([str(el) for el in elements])
|
| 444 |
-
# else:
|
| 445 |
doc = Document(file_path)
|
| 446 |
text = "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
|
| 447 |
return text.strip()
|
|
@@ -2730,120 +2651,115 @@ else:
|
|
| 2730 |
# 2. Section d'upload de documents
|
| 2731 |
st.markdown("---")
|
| 2732 |
st.markdown("**Ajouter un document**")
|
| 2733 |
-
|
| 2734 |
uploaded_file = st.file_uploader(
|
| 2735 |
-
"Sélectionnez un PDF ou Word",
|
| 2736 |
type=["pdf", "docx"],
|
| 2737 |
key="doc_uploader"
|
| 2738 |
)
|
| 2739 |
|
| 2740 |
if uploaded_file:
|
| 2741 |
-
|
| 2742 |
-
|
| 2743 |
-
"
|
| 2744 |
-
|
| 2745 |
-
|
| 2746 |
-
|
| 2747 |
-
|
| 2748 |
-
|
| 2749 |
-
|
| 2750 |
-
|
| 2751 |
-
|
| 2752 |
-
|
| 2753 |
-
|
| 2754 |
-
|
| 2755 |
-
|
| 2756 |
-
|
| 2757 |
-
st.
|
| 2758 |
-
|
| 2759 |
-
|
| 2760 |
-
except Exception as e:
|
| 2761 |
-
st.error(f"❌ Erreur pendant l'upload vers Drive : {e}")
|
| 2762 |
-
print("DEBUG: Erreur upload_to_drive_resumable:", e)
|
| 2763 |
-
|
| 2764 |
-
try:
|
| 2765 |
-
results = drive_service.files().list(
|
| 2766 |
-
q=f"'{FOLDER_ID}' in parents",
|
| 2767 |
-
fields="files(id, name)"
|
| 2768 |
-
).execute()
|
| 2769 |
-
st.write("📂 Contenu du dossier uploads :", results.get("files", []))
|
| 2770 |
-
print("DEBUG: Contenu du dossier uploads:", results.get("files", []))
|
| 2771 |
-
except Exception as e:
|
| 2772 |
-
st.error(f"❌ Erreur pendant la liste des fichiers : {e}")
|
| 2773 |
-
print("DEBUG: Erreur drive_service.files().list:", e)
|
| 2774 |
-
|
| 2775 |
-
if not custom_name.strip():
|
| 2776 |
-
st.warning("Veuillez entrer un nom valide.")
|
| 2777 |
-
print("DEBUG: Nom du document invalide")
|
| 2778 |
-
else:
|
| 2779 |
-
display_name = custom_name.strip().replace(" ", "_")
|
| 2780 |
-
collection_name = f"{display_name}__{int(datetime.now().timestamp())}"
|
| 2781 |
-
st.write("🆕 Nom de collection généré :", collection_name)
|
| 2782 |
-
print("DEBUG: Nom de collection généré:", collection_name)
|
| 2783 |
-
|
| 2784 |
-
progress_bar = st.progress(0)
|
| 2785 |
-
status_text = st.empty()
|
| 2786 |
-
|
| 2787 |
-
def update_progress(current, total, message):
|
| 2788 |
-
percent = int((current / total) * 100) if total > 0 else 0
|
| 2789 |
-
progress_bar.progress(percent)
|
| 2790 |
-
status_text.text(f"{message} ({current}/{total})")
|
| 2791 |
-
print(f"DEBUG: Progress {percent}% - {message}")
|
| 2792 |
-
|
| 2793 |
-
try:
|
| 2794 |
-
status_text.text("Création de la collection dans Qdrant...")
|
| 2795 |
-
st.write("⚙️ Création collection Qdrant :", collection_name)
|
| 2796 |
-
print("DEBUG: Création collection Qdrant:", collection_name)
|
| 2797 |
-
|
| 2798 |
-
qdrant_client.create_collection(
|
| 2799 |
-
collection_name=collection_name,
|
| 2800 |
-
vectors_config=models.VectorParams(size=1024, distance=models.Distance.COSINE)
|
| 2801 |
-
)
|
| 2802 |
-
|
| 2803 |
-
status_text.text("Upload vers Google Drive...")
|
| 2804 |
-
st.write("⚙️ Upload vers Drive (resumable)...")
|
| 2805 |
-
print("DEBUG: Upload vers Drive (resumable)...")
|
| 2806 |
-
file_id, tmp_path = upload_to_drive_resumable(uploaded_file)
|
| 2807 |
-
st.write("📄 Fichier temporaire pour traitement :", tmp_path)
|
| 2808 |
-
print("DEBUG: Fichier temporaire pour traitement:", tmp_path)
|
| 2809 |
-
|
| 2810 |
-
status_text.text("Traitement et indexation en cours...")
|
| 2811 |
-
st.write("⚙️ Lancement process_and_index_document...")
|
| 2812 |
-
print("DEBUG: Lancement process_and_index_document...")
|
| 2813 |
-
success = process_and_index_document(
|
| 2814 |
-
file_path=tmp_path,
|
| 2815 |
-
file_type=uploaded_file.name.split('.')[-1],
|
| 2816 |
-
collection_name=collection_name,
|
| 2817 |
-
qdrant_client=qdrant_client,
|
| 2818 |
-
embedding_model=embedding_model,
|
| 2819 |
-
progress_callback=update_progress
|
| 2820 |
-
)
|
| 2821 |
-
|
| 2822 |
-
if success:
|
| 2823 |
-
status_text.text("Document ajouté avec succès !")
|
| 2824 |
-
progress_bar.progress(100)
|
| 2825 |
-
st.success(f"✅ Document ajouté sous le nom '{custom_name}' !")
|
| 2826 |
-
print("DEBUG: Document ajouté avec succès")
|
| 2827 |
-
|
| 2828 |
-
if os.path.exists(tmp_path):
|
| 2829 |
-
os.unlink(tmp_path)
|
| 2830 |
-
print("DEBUG: Fichier temporaire supprimé:", tmp_path)
|
| 2831 |
-
|
| 2832 |
-
st.session_state.manage_doc_base = False
|
| 2833 |
-
st.rerun()
|
| 2834 |
else:
|
| 2835 |
-
|
| 2836 |
-
|
| 2837 |
-
|
| 2838 |
-
|
| 2839 |
-
|
| 2840 |
-
|
| 2841 |
-
|
| 2842 |
-
|
| 2843 |
-
|
| 2844 |
-
|
| 2845 |
-
|
| 2846 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2847 |
|
| 2848 |
|
| 2849 |
#########################################################################################
|
|
|
|
| 2 |
import hashlib
|
| 3 |
import requests
|
| 4 |
import os
|
|
|
|
| 5 |
import pytz
|
| 6 |
import time
|
| 7 |
import unicodedata
|
|
|
|
| 9 |
import re
|
| 10 |
import tempfile
|
| 11 |
import uuid
|
| 12 |
+
import base64
|
| 13 |
from pydantic import BaseModel, validator
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
from sentence_transformers import SentenceTransformer
|
|
|
|
| 20 |
from collections import defaultdict
|
| 21 |
from PyPDF2 import PdfReader
|
| 22 |
from docx import Document
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
try:
|
| 25 |
import pdfplumber
|
|
|
|
| 344 |
|
| 345 |
# --- 1a. Fonctions d'upload, d'indexation et d'embedding
|
| 346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
# Fonction pour extraire le texte d'un document pdf
|
| 348 |
def extract_text(pdf_path: str, max_pages: Optional[int] = None) -> str:
|
| 349 |
"""Extrait le texte d'un PDF, avec gestion des erreurs et des pages vides (VOTRE FONCTION)."""
|
|
|
|
| 363 |
# Fonction pour extraire le texte d'un document Word
|
| 364 |
def extract_text_from_docx(file_path: str, use_unstructured: bool = False) -> str:
|
| 365 |
"""Extrait le texte d'un fichier Word."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
doc = Document(file_path)
|
| 367 |
text = "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
|
| 368 |
return text.strip()
|
|
|
|
| 2651 |
# 2. Section d'upload de documents
|
| 2652 |
st.markdown("---")
|
| 2653 |
st.markdown("**Ajouter un document**")
|
|
|
|
| 2654 |
uploaded_file = st.file_uploader(
|
| 2655 |
+
"Sélectionnez un PDF ou Word (max 10 Mo)",
|
| 2656 |
type=["pdf", "docx"],
|
| 2657 |
key="doc_uploader"
|
| 2658 |
)
|
| 2659 |
|
| 2660 |
if uploaded_file:
|
| 2661 |
+
# Vérifier la taille du fichier
|
| 2662 |
+
if uploaded_file.size > 10 * 1024 * 1024: # 10 Mo
|
| 2663 |
+
st.error("❌ Le fichier dépasse la limite de 10 Mo autorisée.")
|
| 2664 |
+
else:
|
| 2665 |
+
# Champ pour le nom personnalisé
|
| 2666 |
+
default_name = os.path.splitext(uploaded_file.name)[0]
|
| 2667 |
+
custom_name = st.text_input(
|
| 2668 |
+
"Nom du document :",
|
| 2669 |
+
value=default_name,
|
| 2670 |
+
key="doc_name_input"
|
| 2671 |
+
)
|
| 2672 |
+
if st.button("Ajouter le document", key="add_document"):
|
| 2673 |
+
if not custom_name.strip():
|
| 2674 |
+
st.warning("Veuillez entrer un nom valide.")
|
| 2675 |
+
else:
|
| 2676 |
+
# Remplacez par votre token GitHub (à générer dans Settings > Developer settings > Personal access tokens)
|
| 2677 |
+
GITHUB_TOKEN = st.secrets.get("GITHUB_TOKEN", "") # Ou définissez-le directement ici (moins sécurisé)
|
| 2678 |
+
if not GITHUB_TOKEN:
|
| 2679 |
+
st.error("❌ Token GitHub manquant. Veuillez configurer `GITHUB_TOKEN` dans les secrets de votre Space.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2680 |
else:
|
| 2681 |
+
# Génération du nom de la collection
|
| 2682 |
+
display_name = custom_name.strip().replace(" ", "_")
|
| 2683 |
+
collection_name = f"{display_name}__{int(datetime.now().timestamp())}"
|
| 2684 |
+
progress_bar = st.progress(0)
|
| 2685 |
+
status_text = st.empty()
|
| 2686 |
+
|
| 2687 |
+
def update_progress(current, total, message):
|
| 2688 |
+
percent = int((current / total) * 100) if total > 0 else 0
|
| 2689 |
+
progress_bar.progress(percent)
|
| 2690 |
+
status_text.text(f"{message} ({current}/{total})")
|
| 2691 |
+
|
| 2692 |
+
try:
|
| 2693 |
+
# 1. Upload vers GitHub Gist
|
| 2694 |
+
status_text.text("Upload du fichier vers GitHub Gist...")
|
| 2695 |
+
headers = {"Authorization": f"token {GITHUB_TOKEN}"}
|
| 2696 |
+
file_content = base64.b64encode(uploaded_file.getvalue()).decode("utf-8")
|
| 2697 |
+
data = {
|
| 2698 |
+
"description": f"Document pour RAG: {custom_name}",
|
| 2699 |
+
"public": False,
|
| 2700 |
+
"files": {
|
| 2701 |
+
uploaded_file.name: {"content": file_content}
|
| 2702 |
+
}
|
| 2703 |
+
}
|
| 2704 |
+
response = requests.post("https://api.github.com/gists", json=data, headers=headers)
|
| 2705 |
+
response.raise_for_status() # Vérifie les erreurs HTTP
|
| 2706 |
+
gist_data = response.json()
|
| 2707 |
+
gist_url = gist_data["html_url"]
|
| 2708 |
+
raw_url = gist_data["files"][uploaded_file.name]["raw_url"]
|
| 2709 |
+
|
| 2710 |
+
# 2. Téléchargement du fichier depuis GitHub Gist
|
| 2711 |
+
status_text.text("Téléchargement du fichier depuis GitHub Gist...")
|
| 2712 |
+
file_response = requests.get(raw_url)
|
| 2713 |
+
file_response.raise_for_status()
|
| 2714 |
+
|
| 2715 |
+
# 3. Sauvegarde temporaire locale
|
| 2716 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
|
| 2717 |
+
tmp_file.write(file_response.content)
|
| 2718 |
+
tmp_path = tmp_file.name
|
| 2719 |
+
|
| 2720 |
+
# 4. Création de la collection Qdrant
|
| 2721 |
+
status_text.text("Création de la collection dans Qdrant...")
|
| 2722 |
+
qdrant_client.create_collection(
|
| 2723 |
+
collection_name=collection_name,
|
| 2724 |
+
vectors_config=models.VectorParams(
|
| 2725 |
+
size=1024,
|
| 2726 |
+
distance=models.Distance.COSINE
|
| 2727 |
+
)
|
| 2728 |
+
)
|
| 2729 |
+
|
| 2730 |
+
# 5. Traitement et indexation
|
| 2731 |
+
status_text.text("Traitement et indexation en cours...")
|
| 2732 |
+
success = process_and_index_document(
|
| 2733 |
+
file_path=tmp_path,
|
| 2734 |
+
file_type=uploaded_file.name.split('.')[-1],
|
| 2735 |
+
collection_name=collection_name,
|
| 2736 |
+
qdrant_client=qdrant_client,
|
| 2737 |
+
embedding_model=embedding_model,
|
| 2738 |
+
progress_callback=update_progress
|
| 2739 |
+
)
|
| 2740 |
+
|
| 2741 |
+
if success:
|
| 2742 |
+
status_text.text("Document ajouté avec succès.")
|
| 2743 |
+
progress_bar.progress(100)
|
| 2744 |
+
st.success(f"✅ Document ajouté sous le nom '{custom_name}'.")
|
| 2745 |
+
else:
|
| 2746 |
+
status_text.text("Échec de l'ajout du document")
|
| 2747 |
+
st.error("❌ Échec de l'ajout.")
|
| 2748 |
+
|
| 2749 |
+
except requests.exceptions.RequestException as e:
|
| 2750 |
+
status_text.text(f"Erreur GitHub: {str(e)}")
|
| 2751 |
+
st.error(f"Erreur GitHub: {e}")
|
| 2752 |
+
except Exception as e:
|
| 2753 |
+
status_text.text(f"Erreur: {str(e)}")
|
| 2754 |
+
st.error(f"Erreur: {e}")
|
| 2755 |
+
finally:
|
| 2756 |
+
# Nettoyage
|
| 2757 |
+
if 'tmp_path' in locals() and os.path.exists(tmp_path):
|
| 2758 |
+
os.unlink(tmp_path)
|
| 2759 |
+
time.sleep(2)
|
| 2760 |
+
progress_bar.empty()
|
| 2761 |
+
status_text.empty()
|
| 2762 |
+
st.rerun()
|
| 2763 |
|
| 2764 |
|
| 2765 |
#########################################################################################
|
requirements.txt
CHANGED
|
@@ -12,8 +12,4 @@ pdfplumber
|
|
| 12 |
matplotlib
|
| 13 |
# onnxruntime==1.16.0
|
| 14 |
# unstructured[pdf,docx]==0.10.30
|
| 15 |
-
PyPDF2==3.0.1
|
| 16 |
-
google-api-python-client
|
| 17 |
-
google-auth
|
| 18 |
-
google-auth-oauthlib
|
| 19 |
-
google-auth-httplib2
|
|
|
|
| 12 |
matplotlib
|
| 13 |
# onnxruntime==1.16.0
|
| 14 |
# unstructured[pdf,docx]==0.10.30
|
| 15 |
+
PyPDF2==3.0.1
|
|
|
|
|
|
|
|
|
|
|
|