vineetshukla.work@gmail.com
final commit
c5c9261
import io
import base64
import librosa
import numpy as np
import soundfile as sf
from fastapi import HTTPException
from app.config import settings
import logging
logger = logging.getLogger(__name__)
def decode_base64_audio(base64_string: str) -> io.BytesIO:
"""
Decodes a Base64 string into a BytesIO object.
"""
try:
if "base64," in base64_string:
base64_string = base64_string.split("base64,")[1]
audio_data = base64.b64decode(base64_string)
return io.BytesIO(audio_data)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Invalid Base64 audio: {str(e)}")
def preprocess_audio(audio_file: io.BytesIO):
"""
Clean and standardized preprocessing for AI detection.
Focuses on natural signal preservation to avoid false AI classifications.
"""
import tempfile
import os
try:
# Save to temporary file for librosa
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
tmp_file.write(audio_file.read())
tmp_path = tmp_file.name
try:
# Load audio at 16kHz (Standard for Wav2Vec2)
y, sr = librosa.load(tmp_path, sr=settings.SAMPLE_RATE)
# Ensure mono
if len(y.shape) > 1:
y = librosa.to_mono(y)
# 1. Basic Silence Trimming (Safer threshold)
y_trimmed, _ = librosa.effects.trim(y, top_db=40)
if len(y_trimmed) > sr * 0.1: # Only use if not too much was cut
y = y_trimmed
# 2. Gentle Normalization
# Instead of target RMS, we use standard peak normalization
# This preserves the natural dynamics which models use for detection
if np.max(np.abs(y)) > 0:
y = y / np.max(np.abs(y))
# 3. Time Clamping
max_duration = 30
if len(y) > sr * max_duration:
y = y[:sr * max_duration]
logger.info(f"Natural preprocessing complete: {len(y)/sr:.2f}s")
return y
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Error processing audio file: {str(e)}")