|
|
from flask import Flask, request, jsonify |
|
|
from transformers import pipeline |
|
|
import torch |
|
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
def init_transcription_pipeline(): |
|
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
model_path = "c:/Users/vhits/Documents/Speect2Text/model/whisper-gujarati-medium" |
|
|
transcribe_pipeline = pipeline( |
|
|
task = "automatic-speech-recognition", |
|
|
model = model_path, |
|
|
chunk_length_s = 30, |
|
|
device = device |
|
|
) |
|
|
transcribe_pipeline.model.config.forced_decoder_ids = transcribe_pipeline.tokenizer.get_decoder_prompt_ids(language="gu", task="transcribe") |
|
|
return transcribe_pipeline |
|
|
|
|
|
transcribe_pipeline = init_transcription_pipeline() |
|
|
|
|
|
@app.route('/transcribe', methods=['POST']) |
|
|
def transcribe_audio(): |
|
|
if 'audio_file' not in request.files: |
|
|
return jsonify({"error": "No file part"}), 400 |
|
|
file = request.files['audio_file'] |
|
|
if file.filename == '': |
|
|
return jsonify({"error": "No selected file"}), 400 |
|
|
if file: |
|
|
transcription_result = transcribe_pipeline(file)["text"] |
|
|
return jsonify({"transcription": transcription_result}) |
|
|
|
|
|
return jsonify({"error": "An error occurred during transcription"}), 500 |
|
|
|
|
|
if __name__ == '__main__': |
|
|
app.run(debug=True) |
|
|
|