6.57 kB

	import os
	import tempfile
	import subprocess
	from pathlib import Path

	import whisper
	from deep_translator import GoogleTranslator
	try:
	from moviepy.editor import VideoFileClip
	_HAS_MOVIEPY = True
	except Exception:
	VideoFileClip = None
	_HAS_MOVIEPY = False
	import gradio as gr


	def extract_audio(video_path: str, out_audio_path: str) -> None:
	"""Extract audio from `video_path` into `out_audio_path`.

	Uses `moviepy` when available; otherwise falls back to the `ffmpeg` CLI.
	This prevents the app from crashing at import time if `moviepy` isn't installed
	(useful for Spaces where installation may fail)."""
	if _HAS_MOVIEPY and VideoFileClip is not None:
	clip = VideoFileClip(video_path)
	clip.audio.write_audiofile(out_audio_path, logger=None)
	clip.close()
	return

	# Fallback: use ffmpeg CLI (most Spaces images include ffmpeg)
	cmd = [
	"ffmpeg", "-y", "-i", str(video_path),
	"-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", str(out_audio_path)
	]
	try:
	subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
	except Exception as e:
	raise RuntimeError(f"Impossible d'extraire l'audio — installez 'moviepy' ou vérifiez que 'ffmpeg' est disponible: {e}")


	def transcribe_audio(audio_path: str, model_size: str, device: str = "cpu") -> dict:
	model = whisper.load_model(model_size, device=device)
	result = model.transcribe(audio_path)
	return result


	def translate_text(text: str, target_lang: str = "fr") -> str:
	return GoogleTranslator(source="auto", target=target_lang).translate(text)


	def process_video(file, model_size, translate_to_fr, show_lang, device_choice):
	# streamed updates for Gradio
	yield "Étape 1/5 — enregistrement du fichier..."
	tmpdir = Path(tempfile.mkdtemp(prefix="v2t_"))
	video_path = tmpdir / Path(file.name)
	with open(video_path, "wb") as f:
	f.write(file.read())

	yield "Étape 2/5 — extraction de l'audio (moviepy)..."
	audio_path = tmpdir / "audio.wav"
	try:
	extract_audio(str(video_path), str(audio_path))
	except Exception as e:
	yield f"Erreur lors de l'extraction audio: {e}"
	return

	yield "Étape 3/5 — chargement du modèle Whisper..."
	device = "cuda" if device_choice == "gpu" else "cpu"

	yield f"Étape 4/5 — transcription (modèle {model_size})..."
	try:
	result = transcribe_audio(str(audio_path), model_size, device=device)
	except Exception as e:
	yield f"Erreur lors de la transcription: {e}"
	return

	original_text = result.get("text", "")
	lang = result.get("language") if show_lang else None

	if translate_to_fr:
	yield "Étape 5/5 — traduction en français..."
	try:
	french_text = translate_text(original_text, "fr")
	except Exception as e:
	yield f"Erreur lors de la traduction: {e}"
	return
	else:
	french_text = None

	# save outputs
	txt_path = tmpdir / "transcription.txt"
	with open(txt_path, "w", encoding="utf-8") as f:
	f.write(original_text)

	if french_text is not None:
	fr_path = tmpdir / "transcription_fr.txt"
	with open(fr_path, "w", encoding="utf-8") as f:
	f.write(french_text)

	# results
	yield {
	"status": "Terminé — transcription prête",
	"language": lang or "(non affiché)",
	"transcription": original_text,
	"translation": french_text or "(désactivée)",
	"download_txt": str(txt_path),
	"download_fr": str(fr_path) if french_text is not None else None,
	}


	with gr.Blocks(title="Vidéo → Texte (Whisper) — Gradio UI") as demo:
	gr.Markdown("""
	# 🎬 Vidéo → Transcription (Whisper)

	- Téléverse une vidéo (mp4, mkv...).
	- Choisis le `model` Whisper (tiny→large).
	- Option: traduire le texte en français.

	Remarque : le modèle `small` est un bon compromis vitesse/qualité.
	""")

	with gr.Row():
	with gr.Column(scale=2):
	video_in = gr.File(label="Téléverser une vidéo")
	model_choice = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="small", label="Modèle Whisper")
	translate_chk = gr.Checkbox(label="Traduire en français (après transcription)", value=True)
	show_lang = gr.Checkbox(label="Afficher la langue détectée", value=True)
	device_radio = gr.Radio(choices=["cpu", "gpu"], value="cpu", label="Exécution (device)")
	run_button = gr.Button("Transcrire")

	with gr.Column():
	status_txt = gr.Textbox(label="Statut / Progrès", value="Prêt", lines=2)
	lang_out = gr.Textbox(label="Langue détectée", value="", lines=1)
	trans_out = gr.Textbox(label="Transcription (texte)", value="", lines=12)
	trans_down = gr.File(file_count="single", label="Télécharger la transcription (.txt)")
	fr_out = gr.Textbox(label="Traduction FR (si demandée)", value="", lines=12)
	fr_down = gr.File(file_count="single", label="Télécharger la traduction (.txt)")

	def runner(file, model_choice, translate_chk, show_lang, device_radio):
	if file is None or getattr(file, "name", None) is None:
	return "Aucun fichier fourni.", "", "", None, "", None

	for update in process_video(file, model_choice, translate_chk, show_lang, device_radio):
	# process_video yields either status strings or final dict
	if isinstance(update, str):
	yield update, gr.update(), gr.update(), None, gr.update(), None
	elif isinstance(update, dict):
	yield (
	update.get("status", "Terminé"),
	update.get("language", ""),
	update.get("transcription", ""),
	gr.update(value=update.get("download_txt")),
	update.get("translation", ""),
	gr.update(value=update.get("download_fr")),
	)

	run_button.click(runner, inputs=[video_in, model_choice, translate_chk, show_lang, device_radio],
	outputs=[status_txt, lang_out, trans_out, trans_down, fr_out, fr_down])

	gr.Markdown("---\nNotes : utilise `small` si tu es sur CPU. Pour des vidéos très longues, coupe-les en segments pour fiabiliser la mémoire.")

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", share=False)

Xet Storage Details

Size:: 6.57 kB
Xet hash:: 6f01b6ae6c5f45f9fe7d44b3e3855dfadd5bf9e19ad494143eada33b1ffd0cd7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.