Spaces:

PrashantGoyal
/

SmartPrep

Sleeping

App Files Files Community

PrashantGoyal commited on Jan 1

Commit

9430422

1 Parent(s): 2f2d263

Smart-prep

Browse files

Files changed (4) hide show

.gitignore +5 -0
App/app.py +77 -0
DockerFile +27 -0
requirements.txt +21 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+/venv
+/__pycache__
+*.pyc
+.env
+.DS_Store

App/app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from fastapi import FastAPI, UploadFile
+from fastapi.responses import StreamingResponse
+from faster_whisper import WhisperModel
+import torch
+from transformers import pipeline
+import tempfile
+from scipy.io.wavfile import write as wav_write
+import io
+from io import BytesIO
+import numpy as np
+import uvicorn
+app = FastAPI()
+print("Loading STT model...")
+stt_model = pipeline(
+    "automatic-speech-recognition",
+    model="openai/whisper-small",
+    device="cpu"
+)
+print("Loading TTS model...")
+tts = pipeline(
+    "text-to-speech",
+    model="facebook/mms-tts-eng",
+    device=-1
+)
+@app.post("/stt")
+async def speech_to_text(file: UploadFile):
+    audio_bytes = await file.read()
+    audio, sample_rate = sf.read(io.BytesIO(audio_bytes))
+    if audio.ndim > 1:
+        audio = np.mean(audio, axis=1)
+    result = stt_model({
+        "array": audio,
+        "sampling_rate": sample_rate
+    })
+    return {"text": result["text"]}
+@app.post("/tts")
+async def text_to_speech(payload: dict):
+     text = payload["text"]
+     out = tts(text)
+     audio = out["audio"]
+     sample_rate = int(out["sampling_rate"])
+     audio = np.asarray(audio).squeeze()
+     audio = np.nan_to_num(audio)
+     audio = np.clip(audio, -1.0, 1.0)
+     audio = (audio * 32767).astype(np.int16)
+     buffer = BytesIO()
+     wav_write(buffer, sample_rate, audio)
+     buffer.seek(0)
+     return StreamingResponse(
+        buffer,
+        media_type="audio/wav"
+    )
+@app.get("/")
+def health():
+    return {"status": "ok"}
+if __name__ == "__main__":
+    uvicorn.run("App.app:app", host="0.0.0.0", port=8000, reload=True)

DockerFile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.10-slim
+WORKDIR /Transformers
+# 🔴 REQUIRED system libraries for pyarrow / datasets
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    gcc \
+    g++ \
+    cmake \
+    curl \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+# 🔴 Upgrade tooling and install deps
+RUN python -m pip install --upgrade pip setuptools wheel \
+    && python -m pip install -r requirements.txt --no-cache-dir
+COPY . .
+CMD ["python", "-m", "App.app"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+torch==2.1.2
+torchaudio==2.1.2
+numpy==1.26.4
+scipy==1.11.4
+transformers==4.36.2
+tokenizers==0.15.0
+huggingface-hub==0.20.1
+safetensors==0.4.1
+openai-whisper==20231117
+soundfile
+ffmpeg-python
+fastapi
+uvicorn
+python-multipart
+python-dotenv
+tqdm
+regex
+pyyaml
+requests
+threadpoolctl
+joblib