Spaces:
Build error
Build error
Reproduce Vidtext with distilled whisper
Browse files
app.py
CHANGED
|
@@ -4,25 +4,17 @@ from transformers import pipeline
|
|
| 4 |
from pytube import YouTube
|
| 5 |
from pydub import AudioSegment
|
| 6 |
from audio_extract import extract_audio
|
| 7 |
-
import google.generativeai as google_genai
|
| 8 |
-
|
| 9 |
import os
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
load_dotenv()
|
| 15 |
|
| 16 |
-
GOOGLE_API_KEY =os.getenv("GOOGLE_API_KEY")
|
| 17 |
-
|
| 18 |
-
google_genai.configure(api_key=GOOGLE_API_KEY)
|
| 19 |
-
|
| 20 |
st.set_page_config(
|
| 21 |
-
page_title="
|
| 22 |
)
|
| 23 |
|
| 24 |
-
st.title('
|
| 25 |
-
st.write('A web app for video/audio transcription(Youtube, mp4, mp3)')
|
| 26 |
|
| 27 |
|
| 28 |
def youtube_video_downloader(url):
|
|
@@ -57,7 +49,7 @@ def audio_processing(mp3_audio):
|
|
| 57 |
|
| 58 |
@st.cache_resource
|
| 59 |
def load_asr_model():
|
| 60 |
-
asr_model = pipeline(task="automatic-speech-recognition", model="
|
| 61 |
return asr_model
|
| 62 |
|
| 63 |
transcriber_model = load_asr_model()
|
|
@@ -66,11 +58,6 @@ def transcriber_pass(processed_audio):
|
|
| 66 |
text_extract = transcriber_model(processed_audio)
|
| 67 |
return text_extract['text']
|
| 68 |
|
| 69 |
-
def generate_ai_summary(transcript):
|
| 70 |
-
model = google_genai.GenerativeModel('gemini-pro')
|
| 71 |
-
model_response = model.generate_content([f"Give a summary of the text {transcript}"], stream=True)
|
| 72 |
-
return model_response.text
|
| 73 |
-
|
| 74 |
|
| 75 |
|
| 76 |
# Streamlit UI
|
|
|
|
| 4 |
from pytube import YouTube
|
| 5 |
from pydub import AudioSegment
|
| 6 |
from audio_extract import extract_audio
|
|
|
|
|
|
|
| 7 |
import os
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
|
|
|
|
|
|
|
| 10 |
load_dotenv()
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
st.set_page_config(
|
| 13 |
+
page_title="VidText_distilled"
|
| 14 |
)
|
| 15 |
|
| 16 |
+
st.title('Vidtext_distilwhisper')
|
| 17 |
+
st.write('A web app for video/audio transcription(Youtube, mp4, mp3). Using distilled Whisper')
|
| 18 |
|
| 19 |
|
| 20 |
def youtube_video_downloader(url):
|
|
|
|
| 49 |
|
| 50 |
@st.cache_resource
|
| 51 |
def load_asr_model():
|
| 52 |
+
asr_model = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-large-v3")
|
| 53 |
return asr_model
|
| 54 |
|
| 55 |
transcriber_model = load_asr_model()
|
|
|
|
| 58 |
text_extract = transcriber_model(processed_audio)
|
| 59 |
return text_extract['text']
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
# Streamlit UI
|