Spaces:
Running
Running
Yassine
commited on
Commit
·
1602deb
1
Parent(s):
7b036e8
Add audio transcription feature using Whisper model and update Dockerfile
Browse files- Dockerfile +1 -0
- main.py +33 -2
- requirements.txt +2 -0
Dockerfile
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
FROM python:3.9
|
| 2 |
|
|
|
|
| 3 |
RUN useradd -m -u 1000 user
|
| 4 |
USER user
|
| 5 |
ENV PATH="/home/user/.local/bin:$PATH"
|
|
|
|
| 1 |
FROM python:3.9
|
| 2 |
|
| 3 |
+
RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
|
| 4 |
RUN useradd -m -u 1000 user
|
| 5 |
USER user
|
| 6 |
ENV PATH="/home/user/.local/bin:$PATH"
|
main.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
-
from fastapi import FastAPI, Body
|
| 2 |
import torch
|
| 3 |
import spacy
|
| 4 |
import os
|
| 5 |
from pathlib import Path
|
| 6 |
from fastapi.middleware.cors import CORSMiddleware
|
| 7 |
-
from transformers import AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification
|
| 8 |
from pydantic import BaseModel
|
|
|
|
| 9 |
|
| 10 |
# Define input model
|
| 11 |
|
|
@@ -99,6 +100,26 @@ nlp = spacy.load('fr_core_news_lg')
|
|
| 99 |
|
| 100 |
# Set device (CPU or GPU)
|
| 101 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
ner_model = ner_model.to(device)
|
| 103 |
type_model = type_model.to(device)
|
| 104 |
|
|
@@ -200,3 +221,13 @@ async def analyze_text(input_data: TextInput):
|
|
| 200 |
"confidence": confidence,
|
| 201 |
"entities": filtered_entities
|
| 202 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Body, UploadFile, File
|
| 2 |
import torch
|
| 3 |
import spacy
|
| 4 |
import os
|
| 5 |
from pathlib import Path
|
| 6 |
from fastapi.middleware.cors import CORSMiddleware
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification, AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
| 8 |
from pydantic import BaseModel
|
| 9 |
+
import tempfile
|
| 10 |
|
| 11 |
# Define input model
|
| 12 |
|
|
|
|
| 100 |
|
| 101 |
# Set device (CPU or GPU)
|
| 102 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 103 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 104 |
+
|
| 105 |
+
# Load Whisper model and processor
|
| 106 |
+
model_id = "openai/whisper-large-v3-turbo"
|
| 107 |
+
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 108 |
+
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
| 109 |
+
)
|
| 110 |
+
whisper_model.to(device)
|
| 111 |
+
|
| 112 |
+
whisper_processor = AutoProcessor.from_pretrained(model_id)
|
| 113 |
+
|
| 114 |
+
whisper_pipe = pipeline(
|
| 115 |
+
"automatic-speech-recognition",
|
| 116 |
+
model=whisper_model,
|
| 117 |
+
tokenizer=whisper_processor.tokenizer,
|
| 118 |
+
feature_extractor=whisper_processor.feature_extractor,
|
| 119 |
+
torch_dtype=torch_dtype,
|
| 120 |
+
device=device,
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
ner_model = ner_model.to(device)
|
| 124 |
type_model = type_model.to(device)
|
| 125 |
|
|
|
|
| 221 |
"confidence": confidence,
|
| 222 |
"entities": filtered_entities
|
| 223 |
}
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
@app.post("/transcribe/")
|
| 227 |
+
async def transcribe_audio(file: UploadFile = File(...)):
|
| 228 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
| 229 |
+
tmp.write(await file.read())
|
| 230 |
+
tmp_path = tmp.name
|
| 231 |
+
|
| 232 |
+
result = whisper_pipe(tmp_path)
|
| 233 |
+
return {"transcription": result["text"]}
|
requirements.txt
CHANGED
|
@@ -6,3 +6,5 @@ pydantic==2.9.2
|
|
| 6 |
safetensors==0.4.5
|
| 7 |
spacy==3.7.2
|
| 8 |
fr-core-news-lg @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_lg-3.7.0/fr_core_news_lg-3.7.0-py3-none-any.whl
|
|
|
|
|
|
|
|
|
| 6 |
safetensors==0.4.5
|
| 7 |
spacy==3.7.2
|
| 8 |
fr-core-news-lg @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_lg-3.7.0/fr_core_news_lg-3.7.0-py3-none-any.whl
|
| 9 |
+
torchaudio
|
| 10 |
+
datasets[audio]
|