Spaces:
Build error
Build error
| ''' | |
| import torch | |
| import torchaudio | |
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
| import speech_recognition as sr | |
| import io | |
| from pydub import AudioSegment | |
| import librosa | |
| import whisper | |
| from scipy.io import wavfile | |
| from test import record_voice | |
| model = Wav2Vec2ForCTC.from_pretrained(r'yongjian/wav2vec2-large-a') # Note: PyTorch Model | |
| tokenizer = Wav2Vec2Processor.from_pretrained(r'yongjian/wav2vec2-large-a') | |
| r = sr.Recognizer() | |
| from transformers import pipeline | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| with sr.Microphone(sample_rate=16000) as source: | |
| print("You can start speaking now") | |
| record_voice() | |
| x,_ = librosa.load("output.wav") | |
| model_inputs = tokenizer(x, sampling_rate=16000, return_tensors="pt", padding=True) | |
| logits = model(model_inputs.input_values, attention_mask=model_inputs.attention_mask).logits.cuda() # use .cuda() for GPU acceleration | |
| pred_ids = torch.argmax(logits, dim=-1).cpu() | |
| pred_text = tokenizer.batch_decode(pred_ids) | |
| print(x[:10],x.shape) | |
| print('Transcription:', pred_text) | |
| model = whisper.load_model("base") | |
| result = model.transcribe("output.wav") | |
| print(result["text"]) | |
| summary_input = result["text"] | |
| summary_output = (summarizer(summary_input, max_length=30, min_length=20, do_sample=False)) | |
| print(summary_output) | |
| with open("raw_text.txt",'w',encoding = 'utf-8') as f: | |
| f.write(summary_input) | |
| f.close() | |
| with open("summary_text.txt",'w',encoding = 'utf-8') as f: | |
| f.write(summary_output[0]["summary_text"]) | |
| f.close() | |
| ''' |