Spaces:

Midnightar
/

voice-gender-detection

Sleeping

App Files Files Community

Midnightar commited on Sep 25

Commit

0209771

verified ·

1 Parent(s): acc7904

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -16

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import subprocess
 from pathlib import Path
 import torch
-# Limit PyTorch threads to reduce memory/CPU pressure on small containers
 torch.set_num_threads(1)
 import torchaudio
@@ -25,15 +24,22 @@ TARGET_SR = 16000  # wav2vec2 expects 16 kHz
 def get_model():
     """
     Lazily load processor and model on first call and cache them globally.
-    Call inside request handlers to avoid heavy startup on cold starts.
     """
     global processor, model
     if processor is None or model is None:
-        print("🔁 Loading HF processor & model (this may take 10-60s on first request)...")
         from transformers import Wav2Vec2Processor, AutoModelForAudioClassification
-        processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
         model = AutoModelForAudioClassification.from_pretrained(
-            "prithivMLmods/Common-Voice-Gender-Detection"
         )
         model.eval()
         print("✅ Model & processor loaded.")
@@ -95,10 +101,9 @@ async def predict(file: UploadFile = File(...)):
             try:
                 waveform_np, sr = sf.read(tmp_path, dtype="float32")
             except Exception as e:
-                # If soundfile fails (some mp3/ogg), try using ffmpeg to convert to WAV then read
                 print("⚠️ soundfile could not read directly, trying ffmpeg conversion:", e)
                 converted = tmp_path + ".converted.wav"
-                # Use ffmpeg CLI (ffmpeg must be installed in the container)
                 ffmpeg_cmd = [
                     "ffmpeg", "-y", "-i", tmp_path,
                     "-ar", str(TARGET_SR), "-ac", "1", converted
@@ -111,27 +116,21 @@ async def predict(file: UploadFile = File(...)):
                     pass
         finally:
-            # remove uploaded tmp file as soon as possible
             try:
                 os.unlink(tmp_path)
             except Exception:
                 pass
-        # waveform_np shape: (n_samples,) or (n_samples, channels)
         if waveform_np.ndim > 1:
-            # average channels to mono
             waveform_np = waveform_np.mean(axis=1)
-        # Convert to torch tensor shape [1, n_samples]
         waveform = torch.tensor(waveform_np, dtype=torch.float32).unsqueeze(0)
-        # Resample if necessary
         if sr != TARGET_SR:
             resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=TARGET_SR)
             waveform = resampler(waveform)
             sr = TARGET_SR
-        # Prepare inputs for HF model
         inputs = proc(
             waveform.squeeze().numpy(),
             sampling_rate=sr,
@@ -153,12 +152,11 @@ async def predict(file: UploadFile = File(...)):
         import traceback
         print("🔥 Error in /predict:", e)
         traceback.print_exc()
-        # Return the error string (400) so client can see the reason
         return JSONResponse(status_code=400, content={"error": str(e)})
 if __name__ == "__main__":
-    # Local dev fallback (Railway/Gunicorn uses CMD from Dockerfile)
     import uvicorn
     port = int(os.environ.get("PORT", 8000))
-    uvicorn.run(app, host="0.0.0.0", port=port)

 from pathlib import Path
 import torch
 torch.set_num_threads(1)
 import torchaudio
 def get_model():
     """
     Lazily load processor and model on first call and cache them globally.
+    Uses a custom HF cache dir to avoid permission issues on Hugging Face Spaces.
     """
     global processor, model
     if processor is None or model is None:
+        print("🔁 Loading HF processor & model (this may take 10–60s on first request)...")
         from transformers import Wav2Vec2Processor, AutoModelForAudioClassification
+        cache_dir = os.getenv("HF_HOME", "/app/hf_cache")
+        processor = Wav2Vec2Processor.from_pretrained(
+            "facebook/wav2vec2-base-960h",
+            cache_dir=cache_dir
+        )
         model = AutoModelForAudioClassification.from_pretrained(
+            "prithivMLmods/Common-Voice-Gender-Detection",
+            cache_dir=cache_dir
         )
         model.eval()
         print("✅ Model & processor loaded.")
             try:
                 waveform_np, sr = sf.read(tmp_path, dtype="float32")
             except Exception as e:
+                # If soundfile fails, convert with ffmpeg then read
                 print("⚠️ soundfile could not read directly, trying ffmpeg conversion:", e)
                 converted = tmp_path + ".converted.wav"
                 ffmpeg_cmd = [
                     "ffmpeg", "-y", "-i", tmp_path,
                     "-ar", str(TARGET_SR), "-ac", "1", converted
                     pass
         finally:
             try:
                 os.unlink(tmp_path)
             except Exception:
                 pass
         if waveform_np.ndim > 1:
             waveform_np = waveform_np.mean(axis=1)
         waveform = torch.tensor(waveform_np, dtype=torch.float32).unsqueeze(0)
         if sr != TARGET_SR:
             resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=TARGET_SR)
             waveform = resampler(waveform)
             sr = TARGET_SR
         inputs = proc(
             waveform.squeeze().numpy(),
             sampling_rate=sr,
         import traceback
         print("🔥 Error in /predict:", e)
         traceback.print_exc()
         return JSONResponse(status_code=400, content={"error": str(e)})
 if __name__ == "__main__":
     import uvicorn
     port = int(os.environ.get("PORT", 8000))
+    print(f"🚀 Starting app on port {port}")
+    uvicorn.run(app, host="0.0.0.0", port=port)