Spaces:
Build error
Build error
Vijayanand Sankarasubramanian commited on
Commit ·
85463e8
1
Parent(s): 48d0b40
added wav2vec based trasncription
Browse files- .gitignore +1 -0
- app.py +38 -24
- tools/transcribe.py +33 -34
.gitignore
CHANGED
|
@@ -182,3 +182,4 @@ cache
|
|
| 182 |
flagged
|
| 183 |
*.rtf
|
| 184 |
*.mp3
|
|
|
|
|
|
| 182 |
flagged
|
| 183 |
*.rtf
|
| 184 |
*.mp3
|
| 185 |
+
*.txt
|
app.py
CHANGED
|
@@ -3,7 +3,7 @@ from helpers.model_utils import GPT3, GPT4, LLAMA3, ANTHROPIC2, set_question_ans
|
|
| 3 |
from tools.summarize import MAPREDUCE, STUFF, summarize_podcast
|
| 4 |
from tools.answer_bot import answer_question
|
| 5 |
from tools.aspect_and_sentiment_extraction import extract_aspects_and_sentiment
|
| 6 |
-
from tools.transcribe import transcribe_podcast, transcribe_podcast_from_mp3
|
| 7 |
|
| 8 |
def get_answer_for(user_question, transcript_file_name, question_answer_llm_choice):
|
| 9 |
if transcript_file_name is None:
|
|
@@ -42,13 +42,13 @@ def generate_aspects_and_sentiments(transcript_file_name, sentiment_analysis_llm
|
|
| 42 |
|
| 43 |
return sentiment, transcript_file_name, sentiment_analysis_llm_choice
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
|
| 53 |
def setup_summarization_llm(choice, summarization_llm_choice):
|
| 54 |
set_summarization_llm(choice)
|
|
@@ -69,27 +69,31 @@ def setup_summarization_method(choice, summarization_method):
|
|
| 69 |
summarization_method = choice
|
| 70 |
return choice, summarization_method
|
| 71 |
|
| 72 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
if not uploaded_file:
|
| 74 |
-
|
| 75 |
else:
|
| 76 |
-
transcript_file_name = transcribe_podcast_from_mp3(uploaded_file.name)
|
| 77 |
-
|
| 78 |
-
return
|
| 79 |
|
| 80 |
-
def download_and_transcribe_podcast(mp3_url, transcript_file,
|
| 81 |
if not mp3_url:
|
| 82 |
-
|
| 83 |
else:
|
| 84 |
-
transcript_file = transcribe_podcast(mp3_url)
|
| 85 |
-
|
| 86 |
-
return
|
| 87 |
-
|
| 88 |
|
| 89 |
summarization_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
| 90 |
question_answer_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
| 91 |
sentiment_analysis_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
| 92 |
summarize_method_choices = [MAPREDUCE, STUFF]
|
|
|
|
| 93 |
|
| 94 |
with gr.Blocks() as demo:
|
| 95 |
transcript_file = gr.State()
|
|
@@ -97,18 +101,28 @@ with gr.Blocks() as demo:
|
|
| 97 |
question_answer_llm_choice = gr.State()
|
| 98 |
sentiment_analysis_llm_choice = gr.State()
|
| 99 |
summarization_llm_choice = gr.State()
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
with gr.Row():
|
| 102 |
with gr.Group("Enter Podcast mp3 URL"):
|
| 103 |
mp3_url = gr.Textbox(label="Podcast MP3 URL")
|
| 104 |
submit_button = gr.Button("Transcribe")
|
| 105 |
-
|
|
|
|
| 106 |
with gr.Group("Upload Podcast mp3 File"):
|
| 107 |
mp3_file = gr.File(label="Podcast mp3 file")
|
| 108 |
submit_button = gr.Button("Transcribe")
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
| 112 |
with gr.Group("LLM Selection"):
|
| 113 |
with gr.Row():
|
| 114 |
choice = gr.Radio(label="Summarization LLM", choices=summarization_llm_choices)
|
|
|
|
| 3 |
from tools.summarize import MAPREDUCE, STUFF, summarize_podcast
|
| 4 |
from tools.answer_bot import answer_question
|
| 5 |
from tools.aspect_and_sentiment_extraction import extract_aspects_and_sentiment
|
| 6 |
+
from tools.transcribe import transcribe_podcast, transcribe_podcast_from_mp3, WAV2VEC, AUTOMODELFORSPEECH
|
| 7 |
|
| 8 |
def get_answer_for(user_question, transcript_file_name, question_answer_llm_choice):
|
| 9 |
if transcript_file_name is None:
|
|
|
|
| 42 |
|
| 43 |
return sentiment, transcript_file_name, sentiment_analysis_llm_choice
|
| 44 |
|
| 45 |
+
def setup_transcript_file_handle(uploaded_file, transcript_file_name, transcription_status):
|
| 46 |
+
if not uploaded_file:
|
| 47 |
+
transcription_status = "No File Detected, Failure"
|
| 48 |
+
else:
|
| 49 |
+
transcript_file_name = uploaded_file.name
|
| 50 |
+
transcription_status = "Upload Success"
|
| 51 |
+
return transcription_status, transcript_file_name
|
| 52 |
|
| 53 |
def setup_summarization_llm(choice, summarization_llm_choice):
|
| 54 |
set_summarization_llm(choice)
|
|
|
|
| 69 |
summarization_method = choice
|
| 70 |
return choice, summarization_method
|
| 71 |
|
| 72 |
+
def setup_transcription_method(choice, transcription_method):
|
| 73 |
+
transcription_method = choice
|
| 74 |
+
return choice, transcription_method
|
| 75 |
+
|
| 76 |
+
def transcribe_audio_file(uploaded_file, transcript_file_name, transcription_method):
|
| 77 |
if not uploaded_file:
|
| 78 |
+
status = "No File Detected, Failure"
|
| 79 |
else:
|
| 80 |
+
transcript_file_name = transcribe_podcast_from_mp3(uploaded_file.name, transcription_method)
|
| 81 |
+
status = "Upload Success"
|
| 82 |
+
return transcript_file_name, transcription_method, status
|
| 83 |
|
| 84 |
+
def download_and_transcribe_podcast(mp3_url, transcript_file, transcription_method):
|
| 85 |
if not mp3_url:
|
| 86 |
+
status = "No URL detected, Failure"
|
| 87 |
else:
|
| 88 |
+
transcript_file = transcribe_podcast(mp3_url, transcription_method)
|
| 89 |
+
status = "Upload Success"
|
| 90 |
+
return transcript_file, transcription_method, status
|
|
|
|
| 91 |
|
| 92 |
summarization_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
| 93 |
question_answer_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
| 94 |
sentiment_analysis_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
| 95 |
summarize_method_choices = [MAPREDUCE, STUFF]
|
| 96 |
+
transcription_method_choices = [WAV2VEC, AUTOMODELFORSPEECH]
|
| 97 |
|
| 98 |
with gr.Blocks() as demo:
|
| 99 |
transcript_file = gr.State()
|
|
|
|
| 101 |
question_answer_llm_choice = gr.State()
|
| 102 |
sentiment_analysis_llm_choice = gr.State()
|
| 103 |
summarization_llm_choice = gr.State()
|
| 104 |
+
transcription_method = gr.State()
|
| 105 |
+
|
| 106 |
+
with gr.Group("Trancsription Model Selection"):
|
| 107 |
+
with gr.Row():
|
| 108 |
+
choice = gr.Radio(label="Transcription Model", choices=transcription_method_choices)
|
| 109 |
+
output = gr.Textbox(label="")
|
| 110 |
+
choice.change(setup_transcription_method, inputs=[choice, transcription_method], outputs=[output, transcription_method])
|
| 111 |
with gr.Row():
|
| 112 |
with gr.Group("Enter Podcast mp3 URL"):
|
| 113 |
mp3_url = gr.Textbox(label="Podcast MP3 URL")
|
| 114 |
submit_button = gr.Button("Transcribe")
|
| 115 |
+
status = gr.Textbox(label="", value="Pending Trancsribe")
|
| 116 |
+
submit_button.click(download_and_transcribe_podcast, inputs=[mp3_url, transcript_file, transcription_method], outputs=[transcript_file, transcription_method, status])
|
| 117 |
with gr.Group("Upload Podcast mp3 File"):
|
| 118 |
mp3_file = gr.File(label="Podcast mp3 file")
|
| 119 |
submit_button = gr.Button("Transcribe")
|
| 120 |
+
status = gr.Textbox(label="", value="Pending Transcribe")
|
| 121 |
+
submit_button.click(transcribe_audio_file, inputs=[mp3_file, transcript_file, transcription_method], outputs=[transcript_file, transcription_method, status])
|
| 122 |
+
with gr.Group("Upload RTF File"):
|
| 123 |
+
rtf_file = gr.File(label="Transcripted RTF file")
|
| 124 |
+
submit_button = gr.Button("Upload RTF")
|
| 125 |
+
submit_button.click(setup_transcript_file_handle, inputs=[rtf_file, transcript_file], outputs=[transcript_file])
|
| 126 |
with gr.Group("LLM Selection"):
|
| 127 |
with gr.Row():
|
| 128 |
choice = gr.Radio(label="Summarization LLM", choices=summarization_llm_choices)
|
tools/transcribe.py
CHANGED
|
@@ -4,6 +4,9 @@ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
|
| 4 |
import requests
|
| 5 |
import uuid
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
class Audio_to_Text:
|
| 8 |
def __init__(self):
|
| 9 |
self.model_id = "openai/whisper-large-v3"
|
|
@@ -34,19 +37,17 @@ class Audio_to_Text:
|
|
| 34 |
file.write(response.content)
|
| 35 |
print("MP3 file downloaded and saved successfully.")
|
| 36 |
|
| 37 |
-
def convert_audio_to_text(self, audio_file):
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
print("Transcript saved successfully.")
|
| 47 |
-
return save_file_name
|
| 48 |
|
| 49 |
-
def convert_audio_to_text_from_url(self, url):
|
| 50 |
#get uuid for the audio file
|
| 51 |
uuid_audio = str(uuid.uuid4())
|
| 52 |
save_path = f"audio-{uuid_audio}.mp3"
|
|
@@ -56,34 +57,32 @@ class Audio_to_Text:
|
|
| 56 |
|
| 57 |
return path_text_file_of_audio
|
| 58 |
|
| 59 |
-
def
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
| 65 |
|
| 66 |
-
def transcribe_podcast(file_url):
|
| 67 |
-
# Example usage:
|
| 68 |
-
# url = "https://chrt.fm/track/138C95/prfx.byspotify.com/e/play.podtrac.com/npr-510310/traffic.megaphone.fm/NPR7010771664.mp3"
|
| 69 |
-
|
| 70 |
|
|
|
|
| 71 |
audio_to_text = Audio_to_Text()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
# Convert the audio file to text
|
| 75 |
-
|
| 76 |
-
path_text_file_of_audio = audio_to_text.convert_audio_to_text_from_url(file_url)
|
| 77 |
|
| 78 |
# Print the result
|
| 79 |
print(path_text_file_of_audio)
|
| 80 |
-
return path_text_file_of_audio
|
| 81 |
-
|
| 82 |
-
def transcribe_audio_to_text(speech):
|
| 83 |
-
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
| 84 |
-
text = asr(speech)["text"]
|
| 85 |
-
return text
|
| 86 |
-
|
| 87 |
-
# def text_to_sentiment(text):
|
| 88 |
-
# classifier = pipeline("text-classification")
|
| 89 |
-
# return classifier(text)[0]["label"]
|
|
|
|
| 4 |
import requests
|
| 5 |
import uuid
|
| 6 |
|
| 7 |
+
WAV2VEC = "wav2vec"
|
| 8 |
+
AUTOMODELFORSPEECH = "automodelforspeech"
|
| 9 |
+
|
| 10 |
class Audio_to_Text:
|
| 11 |
def __init__(self):
|
| 12 |
self.model_id = "openai/whisper-large-v3"
|
|
|
|
| 37 |
file.write(response.content)
|
| 38 |
print("MP3 file downloaded and saved successfully.")
|
| 39 |
|
| 40 |
+
def convert_audio_to_text(self, audio_file, transcription_method):
|
| 41 |
+
if transcription_method == WAV2VEC:
|
| 42 |
+
return self.transcribe_audio_to_text_using_wav2vec(audio_file)
|
| 43 |
+
else:
|
| 44 |
+
transformers.logging.set_verbosity_info()
|
| 45 |
+
result = self.pipe(audio_file, generate_kwargs={"language": "english"})
|
| 46 |
+
print("Converted audio to text successfully.")
|
| 47 |
+
# save the result to a text file
|
| 48 |
+
return self.save_transcribed_text_to_file(result)
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
def convert_audio_to_text_from_url(self, url, transcription_method):
|
| 51 |
#get uuid for the audio file
|
| 52 |
uuid_audio = str(uuid.uuid4())
|
| 53 |
save_path = f"audio-{uuid_audio}.mp3"
|
|
|
|
| 57 |
|
| 58 |
return path_text_file_of_audio
|
| 59 |
|
| 60 |
+
def save_transcribed_text_to_file(self, text):
|
| 61 |
+
uuid_text = str(uuid.uuid4())
|
| 62 |
+
save_file_name = f"transcript-{uuid_text}.txt"
|
| 63 |
+
with open(save_file_name, "w") as file:
|
| 64 |
+
file.write(text)
|
| 65 |
+
print("Transcript saved successfully.")
|
| 66 |
+
return save_file_name
|
| 67 |
|
| 68 |
+
def transcribe_audio_to_text_using_wav2vec(self, mp3):
|
| 69 |
+
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
| 70 |
+
text = asr(mp3)["text"]
|
| 71 |
+
return self.save_transcribed_text_to_file(text)
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
+
def transcribe_podcast_from_mp3(mp3_file, transcription_method):
|
| 75 |
audio_to_text = Audio_to_Text()
|
| 76 |
+
return audio_to_text.convert_audio_to_text(mp3_file, transcription_method);
|
| 77 |
+
|
| 78 |
+
def transcribe_podcast(file_url, transcription_method):
|
| 79 |
+
# Example usage:
|
| 80 |
+
# url = "https://chrt.fm/track/138C95/prfx.byspotify.com/e/play.podtrac.com/npr-510310/traffic.megaphone.fm/NPR7010771664.mp3"
|
| 81 |
|
| 82 |
+
audio_to_text = Audio_to_Text()
|
| 83 |
# Convert the audio file to text
|
| 84 |
+
path_text_file_of_audio = audio_to_text.convert_audio_to_text_from_url(file_url, transcription_method)
|
|
|
|
| 85 |
|
| 86 |
# Print the result
|
| 87 |
print(path_text_file_of_audio)
|
| 88 |
+
return path_text_file_of_audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|