VeuReu commited on
Commit
39e9f1d
·
verified ·
1 Parent(s): 2c4ca6c

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -2
  2. audio_tools.py +32 -11
  3. requirements.txt +3 -1
Dockerfile CHANGED
@@ -2,7 +2,7 @@ FROM python:3.11-slim
2
 
3
  # Dependencias del sistema necesarias para vídeo/ocr (ajusta si no las usas)
4
  RUN apt-get update && apt-get install -y --no-install-recommends \
5
- ffmpeg libsm6 libxext6 libgl1 tesseract-ocr \
6
  && rm -rf /var/lib/apt/lists/*
7
 
8
  WORKDIR /app
@@ -13,4 +13,4 @@ COPY . /app
13
 
14
  # HF Spaces expone PORT
15
  ENV PORT=7860
16
- CMD ["uvicorn", "main_api:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
 
2
 
3
  # Dependencias del sistema necesarias para vídeo/ocr (ajusta si no las usas)
4
  RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ ffmpeg libsm6 libxext6 libgl1 tesseract-ocr libsndfile1 \
6
  && rm -rf /var/lib/apt/lists/*
7
 
8
  WORKDIR /app
 
13
 
14
  # HF Spaces expone PORT
15
  ENV PORT=7860
16
+ CMD ["uvicorn", "main_api:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
audio_tools.py CHANGED
@@ -15,20 +15,27 @@
15
  # -----------------------------------------------------------------------------
16
  from __future__ import annotations
17
 
18
- from dataclasses import dataclass
19
- from pathlib import Path
20
- from typing import Any, Dict, List, Optional, Tuple
21
-
22
  import json
23
  import logging
24
  import math
25
  import os
26
  import shlex
27
  import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- import numpy as np
30
  import torch
31
- import torchaudio
32
  import torchaudio.transforms as T
33
  from pydub import AudioSegment
34
  from pyannote.audio import Pipeline
@@ -49,6 +56,25 @@ log.setLevel(logging.INFO)
49
 
50
  # ------------------------------- Utilities -----------------------------------
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def _pick_device_auto(dev_cfg: str) -> str:
53
  """Resolve 'auto' device to cuda/cpu."""
54
  if dev_cfg == "auto":
@@ -279,11 +305,6 @@ def _build_asr_backend_for_language(lang_iso: str, cfg: Dict[str, Any]):
279
  )
280
 
281
  # -------------------------------- Diarization --------------------------------
282
- from pathlib import Path
283
- from typing import List, Dict, Any, Tuple
284
- from pydub import AudioSegment
285
- from pyannote.audio import Pipeline
286
- import math
287
 
288
  def diarize_audio(
289
  wav_path: str,
 
15
  # -----------------------------------------------------------------------------
16
  from __future__ import annotations
17
 
18
+ import numpy as np
 
 
 
19
  import json
20
  import logging
21
  import math
22
  import os
23
  import shlex
24
  import subprocess
25
+ from pathlib import Path
26
+ from typing import List, Dict, Any, Tuple, Optional
27
+ from dataclasses import dataclass
28
+
29
+ # al principio de audio_tools.py
30
+ try:
31
+ import torchaudio as ta
32
+ HAS_TORCHAUDIO = True
33
+ except ImportError:
34
+ ta = None
35
+ HAS_TORCHAUDIO = False
36
+ import soundfile as sf
37
 
 
38
  import torch
 
39
  import torchaudio.transforms as T
40
  from pydub import AudioSegment
41
  from pyannote.audio import Pipeline
 
56
 
57
  # ------------------------------- Utilities -----------------------------------
58
 
59
+ def load_wav(path, sr=16000):
60
+ if HAS_TORCHAUDIO:
61
+ wav, in_sr = ta.load(path)
62
+ if in_sr != sr:
63
+ wav = ta.functional.resample(wav, in_sr, sr)
64
+ return wav.squeeze(0).numpy(), sr
65
+ # fallback con soundfile + resample con librosa
66
+ import librosa
67
+ y, in_sr = sf.read(path, dtype="float32", always_2d=False)
68
+ if in_sr != sr:
69
+ y = librosa.resample(y, orig_sr=in_sr, target_sr=sr)
70
+ return y.astype(np.float32), sr
71
+
72
+ def save_wav(path, y, sr=16000):
73
+ if HAS_TORCHAUDIO:
74
+ ta.save(path, torch.from_numpy(y).unsqueeze(0), sr) # si usas torch
75
+ else:
76
+ sf.write(path, y, sr)
77
+
78
  def _pick_device_auto(dev_cfg: str) -> str:
79
  """Resolve 'auto' device to cuda/cpu."""
80
  if dev_cfg == "auto":
 
305
  )
306
 
307
  # -------------------------------- Diarization --------------------------------
 
 
 
 
 
308
 
309
  def diarize_audio(
310
  wav_path: str,
requirements.txt CHANGED
@@ -17,8 +17,10 @@ ffmpeg-python>=0.2
17
  scikit-learn>=1.5
18
  sentence-transformers>=3.0
19
  transformers>=4.44
20
- torch>=2.3,<3
 
21
 
22
  chromadb>=0.5.4
23
  moviepy>=2.0
24
  tenacity>=8.2
 
 
17
  scikit-learn>=1.5
18
  sentence-transformers>=3.0
19
  transformers>=4.44
20
+ torch==2.3.0
21
+ torchaudio==2.3.0
22
 
23
  chromadb>=0.5.4
24
  moviepy>=2.0
25
  tenacity>=8.2
26
+