Spaces:

Rivalcoder
/

Speaker

Sleeping

App Files Files Community

Rivalcoder commited on Dec 4, 2025

Commit

e2ce4a5

1 Parent(s): 5b04c03

Add Files

Browse files

Files changed (1) hide show

app.py +22 -20

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import warnings
 import io
 import tempfile
 from pathlib import Path
 warnings.filterwarnings('ignore')
 os.environ['PYTHONWARNINGS'] = 'ignore'
@@ -68,13 +69,24 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Global variables for models
 pipeline = None
 whisper_model = None
 @app.on_event("startup")
 async def load_models():
-    """Load models on startup"""
     global pipeline, whisper_model
     print(f"Using device: {device}")
@@ -90,10 +102,10 @@ async def load_models():
     print("Loading Whisper small model...")
     with SuppressStderr():
         whisper_model = whisper.load_model("small", device=device)
     print("Models loaded successfully!\n")
 def process_audio(audio_path):
-    """Process audio file with diarization and transcription"""
     if not os.path.exists(audio_path):
         raise FileNotFoundError(f"Audio file not found: {audio_path}")
@@ -146,7 +158,6 @@ def process_audio(audio_path):
 @app.get("/")
 async def root():
-    """Root endpoint with API information"""
     return {
         "message": "Speaker Diarization & Transcription API",
         "version": "1.0.0",
@@ -159,7 +170,6 @@ async def root():
 @app.get("/health")
 async def health_check():
-    """Health check endpoint"""
     return {
         "status": "healthy",
         "device": str(device),
@@ -168,19 +178,9 @@ async def health_check():
 @app.post("/process")
 async def process_audio_endpoint(file: UploadFile = File(...)):
-    """
-    Process audio file for speaker diarization and transcription
-    Args:
-        file: Audio file (wav, mp3, etc.)
-    Returns:
-        JSON response with segments and full transcription
-    """
     if pipeline is None or whisper_model is None:
         raise HTTPException(status_code=503, detail="Models are still loading. Please try again in a moment.")
-    # Validate file type
     allowed_extensions = {'.wav', '.mp3', '.m4a', '.flac', '.ogg', '.webm'}
     file_ext = Path(file.filename).suffix.lower()
@@ -190,15 +190,17 @@ async def process_audio_endpoint(file: UploadFile = File(...)):
             detail=f"Unsupported file type. Allowed: {', '.join(allowed_extensions)}"
         )
-    # Save uploaded file temporarily
     with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp_file:
         try:
             content = await file.read()
             tmp_file.write(content)
             tmp_file_path = tmp_file.name
-            # Process audio
-            result = process_audio(tmp_file_path)
             return JSONResponse(content=result)
@@ -206,11 +208,11 @@ async def process_audio_endpoint(file: UploadFile = File(...)):
             raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}")
         finally:
-            # Clean up temporary file
             if os.path.exists(tmp_file_path):
                 os.unlink(tmp_file_path)
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import io
 import tempfile
 from pathlib import Path
+import subprocess   # <-- Added
 warnings.filterwarnings('ignore')
 os.environ['PYTHONWARNINGS'] = 'ignore'
     allow_headers=["*"],
 )
+# Convert ANY audio file to WAV using FFmpeg
+def convert_to_wav(input_path):
+    output_path = input_path + "_converted.wav"
+    command = [
+        "ffmpeg", "-y", "-i", input_path,
+        "-ac", "1",
+        "-ar", "16000",
+        output_path
+    ]
+    subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    return output_path
+# Global variables
 pipeline = None
 whisper_model = None
 @app.on_event("startup")
 async def load_models():
     global pipeline, whisper_model
     print(f"Using device: {device}")
     print("Loading Whisper small model...")
     with SuppressStderr():
         whisper_model = whisper.load_model("small", device=device)
     print("Models loaded successfully!\n")
 def process_audio(audio_path):
     if not os.path.exists(audio_path):
         raise FileNotFoundError(f"Audio file not found: {audio_path}")
 @app.get("/")
 async def root():
     return {
         "message": "Speaker Diarization & Transcription API",
         "version": "1.0.0",
 @app.get("/health")
 async def health_check():
     return {
         "status": "healthy",
         "device": str(device),
 @app.post("/process")
 async def process_audio_endpoint(file: UploadFile = File(...)):
     if pipeline is None or whisper_model is None:
         raise HTTPException(status_code=503, detail="Models are still loading. Please try again in a moment.")
     allowed_extensions = {'.wav', '.mp3', '.m4a', '.flac', '.ogg', '.webm'}
     file_ext = Path(file.filename).suffix.lower()
             detail=f"Unsupported file type. Allowed: {', '.join(allowed_extensions)}"
         )
     with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp_file:
         try:
             content = await file.read()
             tmp_file.write(content)
             tmp_file_path = tmp_file.name
+            # Convert ANY format to WAV
+            wav_path = convert_to_wav(tmp_file_path)
+            # Process WAV only
+            result = process_audio(wav_path)
             return JSONResponse(content=result)
             raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}")
         finally:
             if os.path.exists(tmp_file_path):
                 os.unlink(tmp_file_path)
+            if os.path.exists(tmp_file_path + "_converted.wav"):
+                os.unlink(tmp_file_path + "_converted.wav")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)