Spaces:

olanigan
/

YoutubeAssistant

Sleeping

App Files Files Community

Ibrahim Olanigan commited on Oct 31, 2023

Commit

c6097eb

1 Parent(s): 5b15a61

Resolved page state

Browse files

Files changed (1) hide show

app.py +48 -35

app.py CHANGED Viewed

@@ -5,27 +5,30 @@ import whisper
 URL = 'URL'
 TEXT = 'TEXT'
-WHISPER = 'WHISPER'
 PROCESSING = 'PROCESSING'
-STATES = [ TEXT, WHISPER, PROCESSING]
-AUDIO_FILE = "audio.mp3"
-TRANSCRIPT = "transcript.txt"
 AUDIO_EXISTS = "AUDIO_EXISTS"
 TRANSCRIPT_EXISTS = "TRANSCRIPT_EXISTS"
 model = ''
 st.title('Youtube Assistant')
 def init_state():
     for state in STATES:
-        st.session_state[state] = ''
-    if AUDIO_EXISTS not in st.session_state:
-        st.session_state[AUDIO_EXISTS] = False
-        st.session_state[TRANSCRIPT_EXISTS] = False
-    if URL not in st.session_state or not st.session_state[URL]:
-        clear_old_files()
 def clear_old_files():
     print("Clearing old files")
@@ -36,71 +39,81 @@ def clear_old_files():
     #Refresh audio state
     check_audio()
 def load_whisper():
     check_audio()
     model = whisper.load_model("small")
     print('Loaded Whisper Medium model')
     if st.session_state[AUDIO_EXISTS]:
-        print('Transcribing with Whisper model')
         result = model.transcribe("audio.mp3")
-        st.session_state[WHISPER] = result["text"]
-        write_file(result["text"], "transcript.txt")
         check_audio()
-        print(f"Transcribe results: {result.keys()}")
         write_file(str(result["segments"]), "segments.txt")
 def check_audio():
     st.session_state[AUDIO_EXISTS] = os.path.exists(AUDIO_FILE)
     st.session_state[TRANSCRIPT_EXISTS] = os.path.exists(TRANSCRIPT)
 def load_audio():
-    if st.session_state[AUDIO_EXISTS]:
         audio_file = open(AUDIO_FILE, 'rb')
         audio_bytes = audio_file.read()
-        print(f"Audio file exists...{len(audio_bytes)}")
         st.audio(audio_bytes, format="audio/mp3")
 def display():
     container = st.container()
     text_container = st.container()
     with container:
         with st.form(key='input_form', clear_on_submit=False):
-            user_input = st.text_input("Youtube URL:", placeholder="https://www.youtube.com/watch?v=rdONCqZoUZE", key=URL)
             input_submit_button = st.form_submit_button(label='Send')
-        load_audio()
     if input_submit_button and user_input:
         clear_old_files()
         with st.spinner('Downloading Audio...'):
             download()
             load_audio()
         with st.spinner('Transcribing Audio...'):
-            load_whisper()
     with text_container:
-       st.text_area(label="Youtube Transcript:",
                     height=200,
-                    value=st.session_state[WHISPER])
         #Download Button section
     col1, col2 = st.columns(2)
     with col1:
-        if st.session_state[AUDIO_EXISTS]:
-            st.download_button("Download Audio","file","audio.mp3","application/octet-stream")
     with col2:
-        if os.path.exists("transcript.txt"):
-            st.download_button("Download Transcript",st.session_state[TEXT],"transcript.txt","text/plain")
 def download():
   command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"]
   print(command)
-  out = subprocess.run(command, shell=True)
   check_audio()
 def write_file(text, filename):

 URL = 'URL'
 TEXT = 'TEXT'
+TITLE = 'TITLE'
 PROCESSING = 'PROCESSING'
 AUDIO_EXISTS = "AUDIO_EXISTS"
 TRANSCRIPT_EXISTS = "TRANSCRIPT_EXISTS"
+STATES = [ TEXT, TITLE]
+BOOL_STATES = [ AUDIO_EXISTS, TRANSCRIPT_EXISTS, PROCESSING]
+AUDIO_FILE = "audio.mp3"
+TRANSCRIPT = "transcript.txt"
 model = ''
 st.title('Youtube Assistant')
 def init_state():
+    # print("Page refreshed")
     for state in STATES:
+        if state not in st.session_state:
+            st.session_state[state] = ''
+    for state in BOOL_STATES:
+        if state not in st.session_state:
+            st.session_state[state] = False
 def clear_old_files():
     print("Clearing old files")
     #Refresh audio state
     check_audio()
+@st.cache_data
 def load_whisper():
     check_audio()
     model = whisper.load_model("small")
     print('Loaded Whisper Medium model')
+    return model
+def transcribe():
     if st.session_state[AUDIO_EXISTS]:
+        model = load_whisper()
         result = model.transcribe("audio.mp3")
+        text = result["text"]
+        st.session_state[TEXT] = text
+        print(f"Start - { text[:100]}")
+        print(f"End - { text[-100:]}")
+        write_file(text, "transcript.txt")
         check_audio()
         write_file(str(result["segments"]), "segments.txt")
+        return text
 def check_audio():
     st.session_state[AUDIO_EXISTS] = os.path.exists(AUDIO_FILE)
     st.session_state[TRANSCRIPT_EXISTS] = os.path.exists(TRANSCRIPT)
 def load_audio():
+    if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]:
         audio_file = open(AUDIO_FILE, 'rb')
         audio_bytes = audio_file.read()
         st.audio(audio_bytes, format="audio/mp3")
 def display():
     container = st.container()
     text_container = st.container()
     with container:
         with st.form(key='input_form', clear_on_submit=False):
+            user_input = st.text_input("Youtube URL:", placeholder="https://www.youtube.com", key=URL)
             input_submit_button = st.form_submit_button(label='Send')
     if input_submit_button and user_input:
+        st.session_state[PROCESSING] = True
         clear_old_files()
         with st.spinner('Downloading Audio...'):
             download()
             load_audio()
         with st.spinner('Transcribing Audio...'):
+            transcribe()
+        st.session_state[PROCESSING] = False
     with text_container:
+       st.text_area(label=f"Youtube Transcript: {st.session_state[TITLE]}",
                     height=200,
+                    value=st.session_state[TEXT])
         #Download Button section
     col1, col2 = st.columns(2)
     with col1:
+        if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]:
+            with open("audio.mp3", "rb") as f:
+                data = f.read()
+                st.download_button('Download MP3', data,"audio.mp3")
     with col2:
+        if st.session_state[TRANSCRIPT_EXISTS]:
+            st.download_button("Download Transcript",st.session_state[TEXT],"transcript.txt")
 def download():
+  #Get youtube title
+  text = subprocess.run(["yt-dlp", "--get-title", st.session_state[URL]], capture_output=True)
+  st.session_state[TITLE] = text.stdout.decode("utf-8").strip()
+  # Download and convert audio
   command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"]
   print(command)
+  subprocess.run(command, shell=True)
   check_audio()
 def write_file(text, filename):