Spaces:

kamranferoz
/

a2t

Runtime error

App Files Files Community

kamranferoz commited on Sep 15, 2023

Commit

5bd4c84

1 Parent(s): 532c1b7

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

00_a2t.py +122 -0
README.md +3 -9
requirements.txt +112 -0

00_a2t.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import whisper
+import gradio as gr
+import time
+from pyChatGPT import ChatGPT
+import warnings
+from gtts import gTTS
+import tempfile
+from pydub import AudioSegment
+warnings.filterwarnings("ignore")
+model = whisper.load_model("base")
+def transcribe_long_audio(model, audio_path, segment_length = 30*1000):
+    audio = AudioSegment.from_wav(audio_path)
+    transcription = ''
+    for i in range(0, len(audio), segment_length):
+        audio_segment = audio[i:i + segment_length]
+        transcription += transcribe(model, audio_segment)
+    return transcription
+# def transcribe(model, audio_segment):
+    # load audio and pad/trim it to fit 30 seconds
+    loaded_audio = whisper.load_audio(audio_segment) # Change here, avoid using same name for parameter and variable
+    print("Audio Loaded")   # Debugging print statement
+    padded_trimmed_audio = whisper.pad_or_trim(loaded_audio)  # Avoid reusing variable names
+    # make log-Mel spectrogram and move to the same device as the model
+    mel = whisper.log_mel_spectrogram(padded_trimmed_audio).to(model.device)
+    # decode the audio
+    options = whisper.DecodingOptions(fp16=False)
+    result = whisper.decode(model, mel, options)
+    print("Decoded Audio")  # Debugging print statement
+    result_text = result.text
+    print("Transcription: ", result_text)  # Print the complete transcription for debugging
+    return result_text
+# def transcribe(model, audio_segment):
+    # Save audio segment to temporary file
+    temp_file = tempfile.NamedTemporaryFile(delete=True)
+    audio_segment.export(temp_file.name, format="wav")
+    # Load audio from temporary file
+    loaded_audio = whisper.load_audio(temp_file.name)
+    print("Audio Loaded")   # Debugging print statement
+    padded_trimmed_audio = whisper.pad_or_trim(loaded_audio)  # Avoid reusing variable names
+    # make log-Mel spectrogram and move to the same device as the model
+    mel = whisper.log_mel_spectrogram(padded_trimmed_audio).to(model.device)
+    # decode the audio
+    options = whisper.DecodingOptions(fp16=False)
+    result = whisper.decode(model, mel, options)
+    print("Decoded Audio")  # Debugging print statement
+    result_text = result.text
+    print("Transcription: ", result_text)  # Print the complete transcription for debugging
+    return result_text
+# def transcribe(model, audio_segment):
+def transcribe(model, audio_segment):
+    if audio_segment is None or len(audio_segment) == 0:
+        print("No audio data received. Cannot proceed with transcription.")
+        return ""
+    # Save audio segment to temporary file
+    temp_file = tempfile.NamedTemporaryFile(delete=True)
+    audio_segment.export(temp_file.name, format="wav")
+    # Load audio from temporary file
+    loaded_audio = whisper.load_audio(temp_file.name)
+    print("Audio Loaded")   # Debugging print statement
+    padded_trimmed_audio = whisper.pad_or_trim(loaded_audio)
+    # make log-Mel spectrogram and move to the same device as the model
+    mel = whisper.log_mel_spectrogram(padded_trimmed_audio).to(model.device)
+    # decode the audio
+    options = whisper.DecodingOptions(fp16=False)
+    result = whisper.decode(model, mel, options)
+    print("Decoded Audio")
+    result_text = result.text
+    print("Transcription: ", result_text)
+    return result_text
+output_1 = gr.Textbox(label="Speech to Text")
+def transcribe_wrapper(audio_path):
+    return transcribe_long_audio(model, audio_path)
+gr.Interface(
+    title = 'Voice to Text (KF)',
+    fn=transcribe_wrapper,
+    inputs=[
+        gr.inputs.Audio(source="upload", type="filepath")  # change made here
+    ],
+    outputs=[
+        output_1
+    ],
+    live=True, allow_flagging=False).launch(share=True)

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: A2t
-emoji: 🐨
-colorFrom: indigo
-colorTo: green
 sdk: gradio
-sdk_version: 3.44.3
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: a2t
+app_file: 00_a2t.py
 sdk: gradio
+sdk_version: 3.42.0
 ---

requirements.txt ADDED Viewed

	@@ -0,0 +1,112 @@

+aiofiles==23.2.1
+aiohttp==3.8.5
+aiosignal==1.3.1
+altair==5.1.1
+anyio==3.7.1
+async-timeout==4.0.3
+attrs==23.1.0
+beautifulsoup4==4.12.2
+certifi==2023.7.22
+charset-normalizer==3.2.0
+click==8.1.7
+contourpy==1.1.0
+cycler==0.11.0
+dacite==1.8.1
+decorator==4.4.2
+exceptiongroup==1.1.3
+fastapi==0.103.1
+ffmpy==0.3.1
+filelock==3.12.3
+fonttools==4.42.1
+frozenlist==1.4.0
+fsspec==2023.9.0
+gradio==3.42.0
+gradio_client==0.5.0
+gTTS==2.3.2
+h11==0.14.0
+htmlmin==0.1.12
+httpcore==0.17.3
+httpx==0.24.1
+huggingface-hub==0.16.4
+idna==3.4
+ImageHash==4.3.1
+imageio==2.31.3
+imageio-ffmpeg==0.4.9
+importlib-resources==6.0.1
+Jinja2==3.1.2
+joblib==1.3.2
+jsonschema==4.19.0
+jsonschema-specifications==2023.7.1
+kiwisolver==1.4.5
+llvmlite==0.40.1
+markdownify==0.11.6
+MarkupSafe==2.1.3
+matplotlib==3.7.2
+more-itertools==10.1.0
+moviepy==1.0.3
+mpmath==1.3.0
+multidict==6.0.4
+multimethod==1.9.1
+networkx==3.1
+numba==0.57.1
+numpy==1.23.5
+openai==0.28.0
+openai-whisper @ git+https://github.com/openai/whisper.git@e8622f9afc4eba139bf796c210f5c01081000472
+orjson==3.9.5
+outcome==1.2.0
+packaging==23.1
+pandas==2.0.3
+pandas-profiling==3.6.6
+patsy==0.5.3
+phik==0.12.3
+Pillow==10.0.0
+proglog==0.1.10
+pyChatGPT==0.4.3.3
+pydantic==1.10.12
+pydub==0.25.1
+pyparsing==3.0.9
+PySocks==1.7.1
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-multipart==0.0.6
+pytube3==9.6.4
+pytz==2023.3
+PyWavelets==1.4.1
+PyYAML==6.0.1
+referencing==0.30.2
+regex==2023.8.8
+requests==2.31.0
+rpds-py==0.10.2
+scipy==1.11.2
+seaborn==0.12.2
+selenium==4.12.0
+semantic-version==2.10.0
+six==1.16.0
+sniffio==1.3.0
+sortedcontainers==2.4.0
+soupsieve==2.5
+SpeechRecognition==3.10.0
+starlette==0.27.0
+statsmodels==0.14.0
+sympy==1.12
+tangled-up-in-unicode==0.2.0
+tiktoken==0.3.3
+toolz==0.12.0
+torch==2.0.1
+tqdm==4.66.1
+trio==0.22.2
+trio-websocket==0.10.4
+typeguard==2.13.3
+typing_extensions==4.7.1
+tzdata==2023.3
+undetected-chromedriver==3.5.3
+urllib3==2.0.4
+uvicorn==0.23.2
+visions==0.7.5
+websockets==11.0.3
+whisper==1.1.10
+wordcloud==1.9.2
+wsproto==1.2.0
+yarl==1.9.2
+ydata-profiling==4.5.1
+zipp==3.16.2