Spaces:
Sleeping
Sleeping
| import subprocess | |
| import speech_recognition as sr | |
| from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForCTC, AutoModelForCTC | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| from utils import WHITESPACE_HANDLER | |
| from transformers import pipeline | |
| from settings import settings | |
| from transformers import AutoProcessor, AutoModelForCTC | |
| import torchaudio | |
| import requests | |
| async def create_wav(audio_file): | |
| wav_audio_path = audio_file.replace(audio_file.split(".")[-1], '.wav') | |
| subprocess.run(['ffmpeg', '-i', audio_file, wav_audio_path]) | |
| return wav_audio_path | |
| async def speech2text(audio_file): | |
| if not audio_file.endswith(".wav"): | |
| audio_file = await create_wav() | |
| # recognizer = sr.Recognizer() | |
| # with sr.AudioFile(audio_file) as audio_file: | |
| # audio = recognizer.record(audio_file) | |
| # aligned_transcript = recognizer.recognize_google(audio, language=settings.LANGUAGE) | |
| url = settings.URL | |
| headers = {'Authorization': settings.API} | |
| files = {'file': (audio_file, open(audio_file, 'rb'))} | |
| response = requests.post(url, headers=headers, files=files) | |
| aligned_transcript = response.json()['result']["text"] | |
| return aligned_transcript | |
| async def summerizer(aligned_transcript): | |
| model_name = settings.SUMMARIZER_MODEL | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| input_ids = tokenizer( | |
| [WHITESPACE_HANDLER(aligned_transcript)], | |
| return_tensors="pt", | |
| padding="max_length", | |
| truncation=True, | |
| max_length=512)["input_ids"] | |
| output_ids = model.generate( | |
| input_ids=input_ids, | |
| max_length=84, | |
| no_repeat_ngram_size=2, | |
| num_beams=4 | |
| )[0] | |
| summary = tokenizer.decode( | |
| output_ids, | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=False | |
| ) | |
| return summary | |
| async def STT_with_Summary(audio_file): | |
| aligned_transcript = await speech2text(audio_file) | |
| summary = await summerizer(aligned_transcript) | |
| return aligned_transcript, summary | |