|
|
import streamlit as st |
|
|
from transformers import pipeline |
|
|
from diffusers import StableDiffusionPipeline |
|
|
import torch |
|
|
import os |
|
|
import tempfile |
|
|
|
|
|
|
|
|
asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-960h") |
|
|
|
|
|
|
|
|
summarizer_pipeline = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
image_pipeline = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4").to(device) |
|
|
|
|
|
|
|
|
def transcribe(audio_path): |
|
|
transcription = asr_pipeline(audio_path)["text"] |
|
|
return transcription |
|
|
|
|
|
|
|
|
def summarize(transcription): |
|
|
summary = summarizer_pipeline(transcription, max_length=130, min_length=30, do_sample=False)[0]["summary_text"] |
|
|
return summary |
|
|
|
|
|
|
|
|
def generate_image(summary): |
|
|
image = image_pipeline(summary).images[0] |
|
|
return image |
|
|
|
|
|
|
|
|
st.title("Générateur de diapositives intelligent") |
|
|
|
|
|
|
|
|
audio_file = st.file_uploader("Téléchargez un fichier audio", type=["wav", "mp3"]) |
|
|
|
|
|
if audio_file is not None: |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False) as tmp_file: |
|
|
tmp_file.write(audio_file.read()) |
|
|
tmp_filename = tmp_file.name |
|
|
|
|
|
st.audio(audio_file, format='audio/wav') |
|
|
|
|
|
|
|
|
if st.button("Transcrire l'audio"): |
|
|
transcription = transcribe(tmp_filename) |
|
|
st.text_area("Transcription", transcription) |
|
|
|
|
|
|
|
|
if st.button("Résumer la transcription"): |
|
|
summary = summarize(transcription) |
|
|
st.text_area("Résumé", summary) |
|
|
|
|
|
|
|
|
if st.button("Générer une image basée sur le résumé"): |
|
|
image = generate_image(summary) |
|
|
st.image(image, caption="Image générée à partir du résumé") |
|
|
|
|
|
|
|
|
if audio_file is not None: |
|
|
os.remove(tmp_filename) |
|
|
|