Spaces:

SixthFactorConsulting
/

AudioBot

Sleeping

App Files Files Community

Deepakkori45 commited on Feb 2, 2025

Commit

bfbed8e

verified ·

1 Parent(s): c29ce38

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -43

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from dotenv import load_dotenv
 from tempfile import NamedTemporaryFile
 import math
 from docx import Document
 # Load environment variables from .env file
 load_dotenv()
@@ -14,18 +15,128 @@ load_dotenv()
 # Set your OpenAI API key
 openai.api_key = os.getenv("OPENAI_API_KEY")
 def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
     """
     Split an audio file into chunks using silence detection.
-    Args:
-        audio_file_path (str): Path to the audio file.
-        min_silence_len (int): Minimum length of silence (in ms) required to be used as a split point.
-        silence_thresh (int): The volume (in dBFS) below which is considered silence.
-        keep_silence (int): Amount of silence (in ms) to retain at the beginning and end of each chunk.
-    Returns:
-        list: List of AudioSegment chunks.
     """
     audio = AudioSegment.from_file(audio_file_path)
     chunks = split_on_silence(
@@ -39,10 +150,11 @@ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=
 def transcribe(audio_file):
     """
     Transcribe an audio file using the OpenAI Whisper model.
     Args:
         audio_file (str): Path to the audio file.
     Returns:
         str: Transcribed text.
     """
@@ -51,17 +163,17 @@ def transcribe(audio_file):
             model="whisper-1",
             file=audio,
             response_format="text",
-            language="en"  # Ensures transcription is in English
         )
     return response
 def process_audio_chunks(audio_chunks):
     """
     Process and transcribe each audio chunk.
     Args:
         audio_chunks (list): List of AudioSegment chunks.
     Returns:
         str: Combined transcription from all chunks.
     """
@@ -88,29 +200,12 @@ def process_audio_chunks(audio_chunks):
 def save_transcription_to_docx(transcription, audio_file_path):
     """
     Save the transcription as a .docx file.
-    Args:
-        transcription (str): Transcribed text.
-        audio_file_path (str): Path to the original audio file for naming purposes.
-    Returns:
-        str: Path to the saved .docx file.
     """
-    # Extract the base name of the audio file (without extension)
     base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
-    # Create a new file name by appending "_full_transcription" with .docx extension
     output_file_name = f"{base_name}_full_transcription.docx"
-    # Create a new Document object
     doc = Document()
-    # Add the transcription text to the document
     doc.add_paragraph(transcription)
-    # Save the document in .docx format
     doc.save(output_file_name)
     return output_file_name
 st.title("Audio Transcription with OpenAI's Whisper")
@@ -126,36 +221,32 @@ if uploaded_file is not None and st.session_state.transcription is None:
     # Save uploaded file temporarily
     file_extension = uploaded_file.name.split(".")[-1]
-    original_file_name = uploaded_file.name.rsplit('.', 1)[0]  # Get original file name without extension
     temp_audio_file = f"temp_audio_file.{file_extension}"
     with open(temp_audio_file, "wb") as f:
         f.write(uploaded_file.getbuffer())
-    # Split and process audio using silence detection
     with st.spinner('Transcribing...'):
         audio_chunks = split_audio_on_silence(
             temp_audio_file,
-            min_silence_len=500,  # adjust based on your audio characteristics
-            silence_thresh=-40,   # adjust based on the ambient noise level
-            keep_silence=250      # optional: keeps a bit of silence at the edges
         )
         transcription = process_audio_chunks(audio_chunks)
         if transcription:
             st.session_state.transcription = transcription
             st.success('Transcription complete!')
-            # Save transcription to a Word (.docx) file
             output_docx_file = save_transcription_to_docx(transcription, uploaded_file.name)
             st.session_state.output_docx_file = output_docx_file
-    # Clean up temporary file
     if os.path.exists(temp_audio_file):
         os.remove(temp_audio_file)
 if st.session_state.transcription:
     st.text_area("Transcription", st.session_state.transcription, key="transcription_area_final")
-    # Download the transcription as a .docx file
     with open(st.session_state.output_docx_file, "rb") as docx_file:
         st.download_button(
             label="Download Transcription (.docx)",

 from tempfile import NamedTemporaryFile
 import math
 from docx import Document
+import time
 # Load environment variables from .env file
 load_dotenv()
 # Set your OpenAI API key
 openai.api_key = os.getenv("OPENAI_API_KEY")
+# Comprehensive dictionary of languages supported by Whisper (ISO 639-1 codes)
+# This list is based on the languages supported by the official Whisper model.
+languages = {
+    "Afrikaans": "af",
+    "Albanian": "sq",
+    "Amharic": "am",
+    "Arabic": "ar",
+    "Armenian": "hy",
+    "Assamese": "as",
+    "Azerbaijani": "az",
+    "Basque": "eu",
+    "Belarusian": "be",
+    "Bengali": "bn",
+    "Bosnian": "bs",
+    "Bulgarian": "bg",
+    "Burmese": "my",
+    "Catalan": "ca",
+    "Cebuano": "ceb",
+    "Chichewa": "ny",
+    "Chinese": "zh",
+    "Corsican": "co",
+    "Croatian": "hr",
+    "Czech": "cs",
+    "Danish": "da",
+    "Dutch": "nl",
+    "English": "en",
+    "Esperanto": "eo",
+    "Estonian": "et",
+    "Filipino": "tl",
+    "Finnish": "fi",
+    "French": "fr",
+    "Frisian": "fy",
+    "Galician": "gl",
+    "Georgian": "ka",
+    "German": "de",
+    "Greek": "el",
+    "Gujarati": "gu",
+    "Haitian Creole": "ht",
+    "Hausa": "ha",
+    "Hawaiian": "haw",
+    "Hebrew": "he",
+    "Hindi": "hi",
+    "Hmong": "hmn",
+    "Hungarian": "hu",
+    "Icelandic": "is",
+    "Igbo": "ig",
+    "Indonesian": "id",
+    "Irish": "ga",
+    "Italian": "it",
+    "Japanese": "ja",
+    "Javanese": "jw",
+    "Kannada": "kn",
+    "Kazakh": "kk",
+    "Khmer": "km",
+    "Kinyarwanda": "rw",
+    "Korean": "ko",
+    "Kurdish": "ku",
+    "Kyrgyz": "ky",
+    "Lao": "lo",
+    "Latin": "la",
+    "Latvian": "lv",
+    "Lithuanian": "lt",
+    "Luxembourgish": "lb",
+    "Macedonian": "mk",
+    "Malagasy": "mg",
+    "Malay": "ms",
+    "Malayalam": "ml",
+    "Maltese": "mt",
+    "Maori": "mi",
+    "Marathi": "mr",
+    "Mongolian": "mn",
+    "Nepali": "ne",
+    "Norwegian": "no",
+    "Nyanja": "ny",
+    "Odia": "or",
+    "Pashto": "ps",
+    "Persian": "fa",
+    "Polish": "pl",
+    "Portuguese": "pt",
+    "Punjabi": "pa",
+    "Romanian": "ro",
+    "Russian": "ru",
+    "Samoan": "sm",
+    "Scots Gaelic": "gd",
+    "Serbian": "sr",
+    "Sesotho": "st",
+    "Shona": "sn",
+    "Sindhi": "sd",
+    "Sinhala": "si",
+    "Slovak": "sk",
+    "Slovenian": "sl",
+    "Somali": "so",
+    "Spanish": "es",
+    "Sundanese": "su",
+    "Swahili": "sw",
+    "Swedish": "sv",
+    "Tajik": "tg",
+    "Tamil": "ta",
+    "Tatar": "tt",
+    "Telugu": "te",
+    "Thai": "th",
+    "Turkish": "tr",
+    "Turkmen": "tk",
+    "Ukrainian": "uk",
+    "Urdu": "ur",
+    "Uyghur": "ug",
+    "Uzbek": "uz",
+    "Vietnamese": "vi",
+    "Welsh": "cy",
+    "Xhosa": "xh",
+    "Yiddish": "yi",
+    "Yoruba": "yo",
+    "Zulu": "zu"
+}
+# Create a selectbox for language selection; default is English.
+selected_lang_name = st.selectbox("Select transcription language", sorted(languages.keys()), index=sorted(languages.keys()).index("English"))
+selected_language = languages[selected_lang_name]
 def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
     """
     Split an audio file into chunks using silence detection.
     """
     audio = AudioSegment.from_file(audio_file_path)
     chunks = split_on_silence(
 def transcribe(audio_file):
     """
     Transcribe an audio file using the OpenAI Whisper model.
+    This uses the OpenAI API with the forced language set to the selected language.
     Args:
         audio_file (str): Path to the audio file.
     Returns:
         str: Transcribed text.
     """
             model="whisper-1",
             file=audio,
             response_format="text",
+            language=selected_language  # Use the selected language code
         )
     return response
 def process_audio_chunks(audio_chunks):
     """
     Process and transcribe each audio chunk.
     Args:
         audio_chunks (list): List of AudioSegment chunks.
     Returns:
         str: Combined transcription from all chunks.
     """
 def save_transcription_to_docx(transcription, audio_file_path):
     """
     Save the transcription as a .docx file.
     """
     base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
     output_file_name = f"{base_name}_full_transcription.docx"
     doc = Document()
     doc.add_paragraph(transcription)
     doc.save(output_file_name)
     return output_file_name
 st.title("Audio Transcription with OpenAI's Whisper")
     # Save uploaded file temporarily
     file_extension = uploaded_file.name.split(".")[-1]
+    original_file_name = uploaded_file.name.rsplit('.', 1)[0]
     temp_audio_file = f"temp_audio_file.{file_extension}"
     with open(temp_audio_file, "wb") as f:
         f.write(uploaded_file.getbuffer())
+    processing_start = time.time()
     with st.spinner('Transcribing...'):
         audio_chunks = split_audio_on_silence(
             temp_audio_file,
+            min_silence_len=500,
+            silence_thresh=-40,
+            keep_silence=250
         )
         transcription = process_audio_chunks(audio_chunks)
         if transcription:
             st.session_state.transcription = transcription
             st.success('Transcription complete!')
             output_docx_file = save_transcription_to_docx(transcription, uploaded_file.name)
             st.session_state.output_docx_file = output_docx_file
+    processing_duration = time.time() - processing_start
+    st.info(f"Total processing time: {processing_duration:.2f} seconds.")
     if os.path.exists(temp_audio_file):
         os.remove(temp_audio_file)
 if st.session_state.transcription:
     st.text_area("Transcription", st.session_state.transcription, key="transcription_area_final")
     with open(st.session_state.output_docx_file, "rb") as docx_file:
         st.download_button(
             label="Download Transcription (.docx)",