Mariame16 commited on
Commit
e2da8d7
·
1 Parent(s): fab623d

feat: déploiement API Dida

Browse files
Files changed (3) hide show
  1. Dockerfile +12 -0
  2. main.py +75 -0
  3. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY main.py .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
3
+ from fastapi import FastAPI, UploadFile
4
+ from fastapi.responses import JSONResponse
5
+ from transformers import (
6
+ VitsModel, AutoTokenizer,
7
+ Wav2Vec2ForCTC, AutoProcessor,
8
+ NllbTokenizer, AutoModelForSeq2SeqLM
9
+ )
10
+ import torch, scipy, base64, numpy as np
11
+ import soundfile as sf
12
+ from io import BytesIO
13
+
14
+ app = FastAPI()
15
+
16
+ # ─── Chargement des modèles ───
17
+ print("Chargement TTS Dida Yocoboué...")
18
+ tts_model = VitsModel.from_pretrained("facebook/mms-tts-gud")
19
+ tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-gud")
20
+
21
+ print("Chargement ASR...")
22
+ asr_processor = AutoProcessor.from_pretrained("facebook/mms-1b-all")
23
+ asr_model = Wav2Vec2ForCTC.from_pretrained("facebook/mms-1b-all")
24
+ asr_processor.tokenizer.set_target_lang("gud")
25
+ asr_model.load_adapter("gud")
26
+
27
+ print("Chargement Traduction NLLB...")
28
+ nllb_tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
29
+ nllb_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
30
+
31
+ # ─── TTS ───
32
+ @app.post("/api/tts")
33
+ async def text_to_speech(payload: dict):
34
+ text = payload["text"]
35
+ inputs = tts_tokenizer(text, return_tensors="pt")
36
+ with torch.no_grad():
37
+ waveform = tts_model(**inputs).waveform.squeeze()
38
+ buffer = BytesIO()
39
+ scipy.io.wavfile.write(buffer, rate=tts_model.config.sampling_rate, data=waveform.numpy())
40
+ audio_b64 = base64.b64encode(buffer.getvalue()).decode()
41
+ return {"audio_base64": audio_b64, "sample_rate": tts_model.config.sampling_rate}
42
+
43
+ # ─── ASR ───
44
+ @app.post("/api/asr")
45
+ async def speech_to_text(file: UploadFile):
46
+ audio_bytes = await file.read()
47
+ audio_array, sr = sf.read(BytesIO(audio_bytes))
48
+ if sr != 16000:
49
+ import librosa
50
+ audio_array = librosa.resample(audio_array, orig_sr=sr, target_sr=16000)
51
+ inputs = asr_processor(audio_array, sampling_rate=16000, return_tensors="pt")
52
+ with torch.no_grad():
53
+ logits = asr_model(**inputs).logits
54
+ predicted_ids = torch.argmax(logits, dim=-1)
55
+ transcription = asr_processor.batch_decode(predicted_ids)[0]
56
+ return {"transcription": transcription}
57
+
58
+ # ─── Traduction ───
59
+ @app.post("/api/translate")
60
+ async def translate(payload: dict):
61
+ text = payload["text"]
62
+ source_lang = payload.get("source_lang", "fra_Latn")
63
+ target_lang = payload.get("target_lang", "fra_Latn")
64
+ inputs = nllb_tokenizer(text, return_tensors="pt", src_lang=source_lang)
65
+ translated = nllb_model.generate(
66
+ **inputs,
67
+ forced_bos_token_id=nllb_tokenizer.lang_code_to_id[target_lang]
68
+ )
69
+ result = nllb_tokenizer.decode(translated[0], skip_special_tokens=True)
70
+ return {"translation": result}
71
+
72
+ # ─── Health check ───
73
+ @app.get("/")
74
+ async def root():
75
+ return {"status": "ok", "message": "API Dida opérationnelle !"}
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn==0.30.0
3
+ transformers==4.41.0
4
+ torch==2.11.0
5
+ scipy==1.13.0
6
+ soundfile==0.12.1
7
+ librosa==0.10.2
8
+ numpy==1.26.4
9
+ python-multipart==0.0.9
10
+ accelerate==0.30.0