Spaces:
Sleeping
Sleeping
Commit ·
bccfcc4
1
Parent(s): 4ec0d98
persist finhsi job
Browse files- pages/1_🎧_Transcriptions.py +61 -55
pages/1_🎧_Transcriptions.py
CHANGED
|
@@ -15,42 +15,51 @@ ANNOTATIONS_PREFIX = "annotations"
|
|
| 15 |
|
| 16 |
import s3fs
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
fs = s3fs.S3FileSystem(
|
| 21 |
key=AWS_ACCESS_KEY_ID,
|
| 22 |
secret=AWS_SECRET_ACCESS_KEY,
|
| 23 |
-
client_kwargs=
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
| 27 |
if not all([S3_BUCKET, S3_PREFIX, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, ENDPOINT_URL]):
|
| 28 |
st.error("Veuillez configurer correctement les variables d'environnement S3.")
|
| 29 |
st.stop()
|
| 30 |
|
| 31 |
-
# Fonction pour vérifier les titres complètement traités
|
| 32 |
def get_completed_titles():
|
| 33 |
"""Renvoie la liste des titres qui n'ont plus d'audios à traiter."""
|
| 34 |
-
status_file = "title_completion_status.json"
|
| 35 |
|
| 36 |
-
|
| 37 |
-
with open(status_file, 'r') as f:
|
| 38 |
status = json.load(f)
|
| 39 |
return [title for title, is_completed in status.items() if is_completed]
|
| 40 |
-
|
| 41 |
return []
|
| 42 |
|
| 43 |
def save_title_completion_status(title, is_completed):
|
| 44 |
"""Sauvegarde l'état de traitement d'un titre dans un fichier JSON."""
|
| 45 |
-
status_file = "title_completion_status.json"
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
status[title] = is_completed
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
st.set_page_config(page_title="Travaux Audio", layout="wide")
|
| 56 |
st.title("🗣️ Travaux Audio - Transcription & Traduction")
|
|
@@ -93,16 +102,17 @@ if st.button("👋 Changer d'utilisateur"):
|
|
| 93 |
st.session_state.current_username = ""
|
| 94 |
st.rerun()
|
| 95 |
|
| 96 |
-
|
| 97 |
-
audio_titles = list_audio_files_by_title()
|
|
|
|
|
|
|
| 98 |
if not audio_titles:
|
| 99 |
st.warning("Aucun audio disponible pour l'instant.")
|
| 100 |
st.stop()
|
| 101 |
|
| 102 |
-
# Obtenir les titres globalement terminés
|
| 103 |
globally_completed_titles = get_completed_titles()
|
|
|
|
| 104 |
|
| 105 |
-
# Filtrer les titres pour exclure ceux qui sont déjà terminés
|
| 106 |
available_titles = [title for title in audio_titles.keys()
|
| 107 |
if title not in st.session_state.completed_titles
|
| 108 |
and title not in globally_completed_titles]
|
|
@@ -125,34 +135,33 @@ selected_title = st.selectbox(
|
|
| 125 |
st.session_state["selected_title"] = selected_title
|
| 126 |
audio_paths = audio_titles[selected_title]
|
| 127 |
|
| 128 |
-
# Récupérer les fichiers déjà traités pour ce titre et cet utilisateur
|
| 129 |
processed_files = get_processed_audio_files_by_user_and_title(username, selected_title)
|
|
|
|
| 130 |
|
| 131 |
-
# Filtrer la liste des audios pour ne garder que ceux non traités
|
| 132 |
unprocessed_audio_paths = [path for path in audio_paths if os.path.basename(path) not in processed_files]
|
|
|
|
| 133 |
|
| 134 |
if not unprocessed_audio_paths:
|
| 135 |
st.success(f"🎉 Vous avez déjà terminé tous les audios du groupe '{selected_title}'!")
|
| 136 |
st.session_state.completed_titles.add(selected_title)
|
| 137 |
|
| 138 |
-
# Vérifier si ce titre est complètement traité par tous les utilisateurs
|
| 139 |
-
# Cela nécessite une fonction qui vérifie si tous les audios de ce titre ont des annotations
|
| 140 |
all_files_processed = True
|
| 141 |
for audio_path in audio_paths:
|
| 142 |
audio_filename = os.path.basename(audio_path)
|
| 143 |
-
annotation_path = f"{ANNOTATIONS_PREFIX}/{selected_title}/{audio_filename}.json"
|
| 144 |
-
if not
|
| 145 |
all_files_processed = False
|
|
|
|
| 146 |
break
|
| 147 |
|
| 148 |
if all_files_processed:
|
|
|
|
| 149 |
save_title_completion_status(selected_title, True)
|
| 150 |
|
| 151 |
if st.button("Continuer avec un autre groupe (Terminé)"):
|
| 152 |
st.rerun()
|
| 153 |
st.stop()
|
| 154 |
|
| 155 |
-
# Initialiser l'index de l'audio pour le titre sélectionné (ou reprendre la progression)
|
| 156 |
index_key = f"index_{selected_title}"
|
| 157 |
if index_key not in st.session_state:
|
| 158 |
st.session_state[index_key] = 0
|
|
@@ -163,8 +172,11 @@ current_index = st.session_state[index_key]
|
|
| 163 |
|
| 164 |
if unprocessed_audio_paths:
|
| 165 |
current_audio = unprocessed_audio_paths[current_index]
|
| 166 |
-
st.subheader(f"🎧 Audio {current_index + 1} sur {len(unprocessed_audio_paths)} : {
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
with st.form(f"form_{current_audio}"):
|
| 170 |
transcription = st.text_area("Transcription en mooré", key=f"tr_{current_audio}")
|
|
@@ -172,38 +184,32 @@ if unprocessed_audio_paths:
|
|
| 172 |
submitted = st.form_submit_button("💾 Soumettre")
|
| 173 |
|
| 174 |
if submitted:
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
# Vérifier si tous les audios non traités de ce groupe sont maintenant terminés
|
| 185 |
-
if st.session_state[index_key] >= len(unprocessed_audio_paths):
|
| 186 |
-
st.success(f"🎉 Vous avez terminé tous les audios du groupe '{selected_title}'!")
|
| 187 |
-
st.session_state.completed_titles.add(selected_title)
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
all_files_processed = False
|
| 196 |
-
break
|
| 197 |
-
|
| 198 |
-
if all_files_processed:
|
| 199 |
save_title_completion_status(selected_title, True)
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
|
|
|
|
|
|
| 203 |
if st.session_state[index_key] >= len(unprocessed_audio_paths) and st.button("Continuer avec un autre groupe"):
|
| 204 |
st.rerun()
|
| 205 |
|
| 206 |
else:
|
| 207 |
st.info(f"Il ne reste plus d'audios à traiter pour le groupe '{selected_title}'.")
|
| 208 |
if st.button("Choisir un autre groupe"):
|
| 209 |
-
st.rerun()
|
|
|
|
| 15 |
|
| 16 |
import s3fs
|
| 17 |
|
|
|
|
|
|
|
| 18 |
fs = s3fs.S3FileSystem(
|
| 19 |
key=AWS_ACCESS_KEY_ID,
|
| 20 |
secret=AWS_SECRET_ACCESS_KEY,
|
| 21 |
+
client_kwargs={"endpoint_url": ENDPOINT_URL}
|
| 22 |
+
)
|
|
|
|
| 23 |
|
| 24 |
if not all([S3_BUCKET, S3_PREFIX, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, ENDPOINT_URL]):
|
| 25 |
st.error("Veuillez configurer correctement les variables d'environnement S3.")
|
| 26 |
st.stop()
|
| 27 |
|
|
|
|
| 28 |
def get_completed_titles():
|
| 29 |
"""Renvoie la liste des titres qui n'ont plus d'audios à traiter."""
|
| 30 |
+
status_file = f"{S3_BUCKET}/title_completion_status.json"
|
| 31 |
|
| 32 |
+
try:
|
| 33 |
+
with fs.open(status_file, 'r') as f:
|
| 34 |
status = json.load(f)
|
| 35 |
return [title for title, is_completed in status.items() if is_completed]
|
| 36 |
+
except (FileNotFoundError, json.JSONDecodeError):
|
| 37 |
return []
|
| 38 |
|
| 39 |
def save_title_completion_status(title, is_completed):
|
| 40 |
"""Sauvegarde l'état de traitement d'un titre dans un fichier JSON."""
|
| 41 |
+
status_file = f"{S3_BUCKET}/title_completion_status.json"
|
| 42 |
|
| 43 |
+
try:
|
| 44 |
+
if fs.exists(status_file):
|
| 45 |
+
with fs.open(status_file, 'r') as f:
|
| 46 |
+
status = json.load(f)
|
| 47 |
+
else:
|
| 48 |
+
status = {}
|
| 49 |
+
except Exception as e:
|
| 50 |
+
st.warning(f"Erreur lors de la lecture du statut: {e}")
|
| 51 |
+
status = {}
|
| 52 |
|
| 53 |
status[title] = is_completed
|
| 54 |
+
print(f"Mise à jour du statut pour {title}: {is_completed}")
|
| 55 |
+
print(f"Statut complet: {status}")
|
| 56 |
|
| 57 |
+
try:
|
| 58 |
+
with fs.open(status_file, 'w') as f:
|
| 59 |
+
json.dump(status, f)
|
| 60 |
+
print(f"Statut sauvegardé avec succès dans {status_file}")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
st.error(f"Erreur lors de la sauvegarde du statut: {e}")
|
| 63 |
|
| 64 |
st.set_page_config(page_title="Travaux Audio", layout="wide")
|
| 65 |
st.title("🗣️ Travaux Audio - Transcription & Traduction")
|
|
|
|
| 102 |
st.session_state.current_username = ""
|
| 103 |
st.rerun()
|
| 104 |
|
| 105 |
+
if "audio_titles" not in st.session_state:
|
| 106 |
+
st.session_state.audio_titles = list_audio_files_by_title()
|
| 107 |
+
|
| 108 |
+
audio_titles = st.session_state.audio_titles
|
| 109 |
if not audio_titles:
|
| 110 |
st.warning("Aucun audio disponible pour l'instant.")
|
| 111 |
st.stop()
|
| 112 |
|
|
|
|
| 113 |
globally_completed_titles = get_completed_titles()
|
| 114 |
+
print(f"Titres globalement terminés: {globally_completed_titles}")
|
| 115 |
|
|
|
|
| 116 |
available_titles = [title for title in audio_titles.keys()
|
| 117 |
if title not in st.session_state.completed_titles
|
| 118 |
and title not in globally_completed_titles]
|
|
|
|
| 135 |
st.session_state["selected_title"] = selected_title
|
| 136 |
audio_paths = audio_titles[selected_title]
|
| 137 |
|
|
|
|
| 138 |
processed_files = get_processed_audio_files_by_user_and_title(username, selected_title)
|
| 139 |
+
print(f"Fichiers déjà traités pour {username} et {selected_title}: {processed_files}")
|
| 140 |
|
|
|
|
| 141 |
unprocessed_audio_paths = [path for path in audio_paths if os.path.basename(path) not in processed_files]
|
| 142 |
+
print(f"Fichiers non traités: {len(unprocessed_audio_paths)} sur {len(audio_paths)}")
|
| 143 |
|
| 144 |
if not unprocessed_audio_paths:
|
| 145 |
st.success(f"🎉 Vous avez déjà terminé tous les audios du groupe '{selected_title}'!")
|
| 146 |
st.session_state.completed_titles.add(selected_title)
|
| 147 |
|
|
|
|
|
|
|
| 148 |
all_files_processed = True
|
| 149 |
for audio_path in audio_paths:
|
| 150 |
audio_filename = os.path.basename(audio_path)
|
| 151 |
+
annotation_path = f"{S3_BUCKET}/{ANNOTATIONS_PREFIX}/{selected_title}/{audio_filename}.json"
|
| 152 |
+
if not fs.exists(annotation_path):
|
| 153 |
all_files_processed = False
|
| 154 |
+
print(f"Fichier non annoté: {annotation_path}")
|
| 155 |
break
|
| 156 |
|
| 157 |
if all_files_processed:
|
| 158 |
+
print(f"Tous les fichiers du titre {selected_title} sont annotés")
|
| 159 |
save_title_completion_status(selected_title, True)
|
| 160 |
|
| 161 |
if st.button("Continuer avec un autre groupe (Terminé)"):
|
| 162 |
st.rerun()
|
| 163 |
st.stop()
|
| 164 |
|
|
|
|
| 165 |
index_key = f"index_{selected_title}"
|
| 166 |
if index_key not in st.session_state:
|
| 167 |
st.session_state[index_key] = 0
|
|
|
|
| 172 |
|
| 173 |
if unprocessed_audio_paths:
|
| 174 |
current_audio = unprocessed_audio_paths[current_index]
|
| 175 |
+
st.subheader(f"🎧 Audio {current_index + 1} sur {len(unprocessed_audio_paths)} : {os.path.basename(current_audio)}")
|
| 176 |
+
|
| 177 |
+
audio_url = get_audio_url(current_audio)
|
| 178 |
+
|
| 179 |
+
st.audio(audio_url)
|
| 180 |
|
| 181 |
with st.form(f"form_{current_audio}"):
|
| 182 |
transcription = st.text_area("Transcription en mooré", key=f"tr_{current_audio}")
|
|
|
|
| 184 |
submitted = st.form_submit_button("💾 Soumettre")
|
| 185 |
|
| 186 |
if submitted:
|
| 187 |
+
try:
|
| 188 |
+
save_result = save_annotation(
|
| 189 |
+
audio_path=current_audio,
|
| 190 |
+
user=username,
|
| 191 |
+
transcription=transcription,
|
| 192 |
+
traduction=traduction,
|
| 193 |
+
)
|
| 194 |
+
st.success("✅ Contribution enregistrée avec succès !")
|
| 195 |
+
print(f"Résultat de sauvegarde: {save_result}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
+
st.session_state[index_key] += 1
|
| 198 |
+
|
| 199 |
+
if st.session_state[index_key] >= len(unprocessed_audio_paths):
|
| 200 |
+
st.success(f"🎉 Vous avez terminé tous les audios du groupe '{selected_title}'!")
|
| 201 |
+
st.session_state.completed_titles.add(selected_title)
|
| 202 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
save_title_completion_status(selected_title, True)
|
| 204 |
+
else:
|
| 205 |
+
st.rerun()
|
| 206 |
+
except Exception as e:
|
| 207 |
+
st.error(f"Erreur lors de l'enregistrement: {e}")
|
| 208 |
+
|
| 209 |
if st.session_state[index_key] >= len(unprocessed_audio_paths) and st.button("Continuer avec un autre groupe"):
|
| 210 |
st.rerun()
|
| 211 |
|
| 212 |
else:
|
| 213 |
st.info(f"Il ne reste plus d'audios à traiter pour le groupe '{selected_title}'.")
|
| 214 |
if st.button("Choisir un autre groupe"):
|
| 215 |
+
st.rerun()
|