Spaces:

shivam-2211
/

voice-detection-api

Sleeping

shivam0897-i commited on Feb 25

Commit

4bfc577

1 Parent(s): 8a6ab53

chore: clean codebase for production

- Remove fix-tag comments and verbose inline comments
- Convert f-string loggers to lazy %s formatting
- Clean up trailing whitespace and blank lines
- Update .gitignore to exclude non-production files
- Remove tracked test artifacts (evaluation_results, test_my_api)

Files changed (7) hide show

.gitignore +18 -12
audio_utils.py +3 -9
config.py +1 -1
evaluation_results.json +0 -50
main.py +53 -154
model.py +28 -70
test_my_api.py +0 -171

.gitignore CHANGED Viewed

@@ -39,18 +39,10 @@ Thumbs.db
 fine_tuned_model/
 training/
-# === Non-production files (keep out of HF Space) ===
-# Tests
-tests/
-pytest.ini
-# Docs and reports
-docs/
-# Dev/validation scripts
-scripts/
-scenario_validation_cases.py
 # Test request fixtures
 test_request.json
@@ -59,3 +51,17 @@ test_valid.json
 # Helper/patch scripts
 _fix_*.py
 _test_*.py

 fine_tuned_model/
 training/
+# Test artifacts (generated output)
+evaluation_results.json
+test_my_api.py
+run_final_tests.py
 # Test request fixtures
 test_request.json
 # Helper/patch scripts
 _fix_*.py
 _test_*.py
+# Download folders
+drive-download-*/
+# Local docs / tests / scripts (not deployed)
+docs/
+tests/
+scripts/
+pytest.ini
+scenario_validation_cases.py
+realtime-analysis-*.json
+# Python project metadata (not needed for deployment)
+pyproject.toml

audio_utils.py CHANGED Viewed

@@ -11,7 +11,6 @@ import numpy as np
 import librosa
 import soundfile as sf
-# Configure logging
 logger = logging.getLogger(__name__)
 # Magic bytes for common audio formats
@@ -106,38 +105,33 @@ def load_audio_from_bytes(audio_bytes: bytes, target_sr: int = 22050, audio_form
     Raises:
         ValueError: If audio cannot be loaded or is invalid
     """
-    # Validate audio content BEFORE attempting to decode
     is_valid, validation_result = validate_audio_content(audio_bytes)
     if not is_valid:
         raise ValueError(f"Invalid audio file: {validation_result}")
-    logger.info(f"Audio validation passed. Detected format hint: {validation_result}")
     tmp_path = None
     try:
-        # Normalize format
         audio_format = audio_format.lower().strip()
         if audio_format.startswith("."):
             audio_format = audio_format[1:]
-        # Validate format (security)
         if not audio_format.isalnum() or len(audio_format) > 5:
             raise ValueError(f"Invalid audio format: {audio_format}")
-        # Write to temp file for librosa
         with tempfile.NamedTemporaryFile(suffix=f".{audio_format}", delete=False) as tmp_file:
             tmp_file.write(audio_bytes)
             tmp_path = tmp_file.name
-        # Load audio with librosa
         audio, sr = librosa.load(tmp_path, sr=target_sr, mono=True)
-        # Validate loaded audio
         if len(audio) == 0:
             raise ValueError("Audio file is empty or could not be decoded")
         duration = len(audio) / sr
-        logger.info(f"Audio loaded successfully: {duration:.2f}s at {sr}Hz")
         return audio, sr

 import librosa
 import soundfile as sf
 logger = logging.getLogger(__name__)
 # Magic bytes for common audio formats
     Raises:
         ValueError: If audio cannot be loaded or is invalid
     """
     is_valid, validation_result = validate_audio_content(audio_bytes)
     if not is_valid:
         raise ValueError(f"Invalid audio file: {validation_result}")
+    logger.info("Audio validation passed. Detected format hint: %s", validation_result)
     tmp_path = None
     try:
         audio_format = audio_format.lower().strip()
         if audio_format.startswith("."):
             audio_format = audio_format[1:]
+        # Reject suspicious format strings
         if not audio_format.isalnum() or len(audio_format) > 5:
             raise ValueError(f"Invalid audio format: {audio_format}")
         with tempfile.NamedTemporaryFile(suffix=f".{audio_format}", delete=False) as tmp_file:
             tmp_file.write(audio_bytes)
             tmp_path = tmp_file.name
         audio, sr = librosa.load(tmp_path, sr=target_sr, mono=True)
         if len(audio) == 0:
             raise ValueError("Audio file is empty or could not be decoded")
         duration = len(audio) / sr
+        logger.info("Audio loaded: %.2fs at %dHz", duration, sr)
         return audio, sr

config.py CHANGED Viewed

@@ -178,7 +178,7 @@ class Settings(BaseSettings):
         description="Mask sensitive entities from transcript before returning response"
     )
-    # WebSocket limits (M8 fix)
     WS_MAX_DURATION_SECONDS: int = Field(
         default=1800,
         description="Maximum WebSocket connection duration in seconds (30 min)"

         description="Mask sensitive entities from transcript before returning response"
     )
+    # WebSocket limits
     WS_MAX_DURATION_SECONDS: int = Field(
         default=1800,
         description="Maximum WebSocket connection duration in seconds (30 min)"

evaluation_results.json DELETED Viewed

@@ -1,50 +0,0 @@
-{
-  "finalScore": 100,
-  "totalFiles": 5,
-  "scorePerFile": 20.0,
-  "successfulClassifications": 5,
-  "wrongClassifications": 0,
-  "failedTests": 0,
-  "fileResults": [
-    {
-      "fileIndex": 0,
-      "status": "success",
-      "matched": true,
-      "score": 20.0,
-      "actualClassification": "AI_GENERATED",
-      "confidenceScore": 0.99
-    },
-    {
-      "fileIndex": 1,
-      "status": "success",
-      "matched": true,
-      "score": 20.0,
-      "actualClassification": "HUMAN",
-      "confidenceScore": 0.99
-    },
-    {
-      "fileIndex": 2,
-      "status": "success",
-      "matched": true,
-      "score": 20.0,
-      "actualClassification": "AI_GENERATED",
-      "confidenceScore": 0.99
-    },
-    {
-      "fileIndex": 3,
-      "status": "success",
-      "matched": true,
-      "score": 20.0,
-      "actualClassification": "HUMAN",
-      "confidenceScore": 0.99
-    },
-    {
-      "fileIndex": 4,
-      "status": "success",
-      "matched": true,
-      "score": 20.0,
-      "actualClassification": "AI_GENERATED",
-      "confidenceScore": 0.99
-    }
-  ]
-}

main.py CHANGED Viewed

@@ -27,14 +27,12 @@ from slowapi import Limiter, _rate_limit_exceeded_handler
 from slowapi.util import get_remote_address
 from slowapi.errors import RateLimitExceeded
-# Configure logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
-# Rate limiting
 limiter = Limiter(key_func=get_remote_address, default_limits=["1000/minute"])
 from audio_utils import decode_base64_audio, load_audio_from_bytes
@@ -50,7 +48,6 @@ try:
 except Exception:  # pragma: no cover - optional dependency
     redis = None
-# Computed constraints
 MAX_AUDIO_BASE64_LENGTH = settings.MAX_AUDIO_SIZE_MB * 1024 * 1024 * 4 // 3
@@ -88,14 +85,13 @@ class SessionState:
 SESSION_STORE: Dict[str, SessionState] = {}
 SESSION_LOCK = asyncio.Lock()
-SESSION_LOCKS: Dict[str, asyncio.Lock] = {}  # Per-session locks (M1)
 SESSION_STORE_BACKEND_ACTIVE = "memory"
 REDIS_CLIENT: Any = None
 ASR_INFLIGHT_TASKS: set[asyncio.Task] = set()
 ASR_INFLIGHT_LOCK = asyncio.Lock()
 def use_redis_session_store() -> bool:
     """Return whether redis-backed session store is active."""
     return SESSION_STORE_BACKEND_ACTIVE == "redis" and REDIS_CLIENT is not None
@@ -296,24 +292,23 @@ def run_startup_warmups() -> None:
 # Detect environment
 if settings.SPACE_ID:
-    logger.info(f"Running on HuggingFace Spaces: {settings.SPACE_ID}")
 def get_session_lock(session_id: str) -> asyncio.Lock:
-    """Return a per-session lock, creating one if needed (M1 fix)."""
     if session_id not in SESSION_LOCKS:
         SESSION_LOCKS[session_id] = asyncio.Lock()
     return SESSION_LOCKS[session_id]
 async def _periodic_session_purge(interval: int = 60) -> None:
-    """Background task: purge expired sessions every *interval* seconds (M2 fix)."""
     while True:
         try:
             await asyncio.sleep(interval)
             async with SESSION_LOCK:
                 removed = purge_expired_sessions()
-                # Also clean up per-session locks for removed sessions
                 stale_lock_keys = [k for k in SESSION_LOCKS if k not in SESSION_STORE]
                 for k in stale_lock_keys:
                     del SESSION_LOCKS[k]
@@ -335,14 +330,13 @@ async def lifespan(app: FastAPI):
         preload_model()
         logger.info("ML model loaded successfully")
     except Exception as e:
-        logger.error(f"Failed to preload model: {e}")
     try:
         await asyncio.to_thread(run_startup_warmups)
     except Exception as exc:
         logger.warning("Startup warm-ups encountered an issue: %s", exc)
-    # Background periodic purge task (M2 fix: avoid purging on every request)
     purge_task = asyncio.create_task(_periodic_session_purge())
     yield
@@ -355,7 +349,6 @@ async def lifespan(app: FastAPI):
     logger.info("Shutting down...")
-# Initialize FastAPI app with lifespan
 app = FastAPI(
     title="AI Voice Detection API",
     description="Detects whether a voice sample is AI-generated or spoken by a real human",
@@ -369,14 +362,9 @@ app = FastAPI(
     lifespan=lifespan
 )
-# Add rate limiter to app state
 app.state.limiter = limiter
 app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
-# Middleware configuration
-# CORS
-# Note: Set ALLOWED_ORIGINS env var in production
-# L2 fix: disable credentials for wildcard origins (browser ignores Set-Cookie anyway)
 _cors_origins = settings.ALLOWED_ORIGINS
 _cors_credentials = "*" not in _cors_origins
 if not _cors_credentials:
@@ -389,38 +377,29 @@ app.add_middleware(
     allow_headers=["Content-Type", "x-api-key", "Authorization"],
 )
-# Request Logging & Timing Middleware
 @app.middleware("http")
 async def log_requests(request: Request, call_next):
-    # Generate request ID and start timer
     request_id = str(uuid.uuid4())[:8]
     request.state.request_id = request_id
     start_time = time.perf_counter()
-    # Log request start
     method = request.method
     path = request.url.path
     if method == "POST":
-        logger.info(f"[{request_id}] [START] {method} {path}")
-    # Process request (async)
     response = await call_next(request)
-    # Calculate duration
     duration_ms = (time.perf_counter() - start_time) * 1000
     status_code = response.status_code
-    # Log request completion with timing
     if method == "POST":
         status_label = "[OK]" if status_code == 200 else "[ERR]" if status_code >= 400 else "[WARN]"
-        logger.info(f"[{request_id}] {status_label} END {method} {path} -> {status_code} ({duration_ms:.0f}ms)")
-    # Add headers
     response.headers["X-Request-ID"] = request_id
     response.headers["X-Response-Time"] = f"{duration_ms:.0f}ms"
     response.headers["X-Content-Type-Options"] = "nosniff"
-    # Allow embedding in Hugging Face iframe
-    # response.headers["X-Frame-Options"] = "DENY"
     response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
     # Relax CSP to allow standard API documentation via CDNs (ReDoc/Swagger)
     response.headers["Content-Security-Policy"] = (
@@ -433,7 +412,6 @@ async def log_requests(request: Request, call_next):
     return response
-# Request/Response Models
 class VoiceDetectionRequest(BaseModel):
     """Request body for voice detection."""
     language: str = Field(default="Auto", description="Language hint (Auto, English, Hindi, Hinglish, Tamil, Malayalam, Telugu). Defaults to auto-detect.")
@@ -703,7 +681,7 @@ def validate_supported_language(language: str) -> str:
     """Validate supported language. Falls back to 'Auto' for unknown languages so the
     evaluator never gets a 400 for an unexpected language hint."""
     if language not in settings.SUPPORTED_LANGUAGES:
-        logger.warning(f"Unsupported language '{language}' — falling back to 'Auto'")
         return "Auto"
     return language
@@ -751,6 +729,7 @@ def dedupe_preserve_order(items: List[str]) -> List[str]:
 def update_session_behaviour_state(session: SessionState, language_analysis: Dict[str, Any]) -> Dict[str, Any]:
     """Update session-level behaviour score from transcript and semantic trends."""
     transcript_source = str(language_analysis.get("transcript_raw", language_analysis.get("transcript", "")))
     transcript = normalize_transcript_for_behavior(transcript_source)
     semantic_flags = list(language_analysis.get("semantic_flags", []))
     keyword_categories = list(language_analysis.get("keyword_categories", []))
@@ -1087,24 +1066,14 @@ def build_risk_update(
         confidence_audio = int(round(confidence * 100))
         anomaly_audio = int(max(0.0, min(100.0, acoustic_anomaly * 0.85)))
         audio_score = max(confidence_audio, anomaly_audio)
-        # When authenticity (signal forensics) contradicts AI classification,
-        # dampen the audio_score.  Browser-mic audio typically has
-        # authenticity 34-60, so the threshold starts low.
-        # IMPORTANT: Only dampen for mic source — file uploads should trust
-        # the model classification.
         if authenticity > 35 and _audio_source == "mic":
-            # Scale factor: authenticity 35 → 1.0 (no change),
-            #               authenticity 55 → 0.80,
-            #               authenticity 80 → 0.55
             auth_dampen = max(0.50, 1.0 - (authenticity - 35) / 100.0)
             audio_score = int(round(audio_score * auth_dampen))
     else:
         authenticity_audio_score = int(max(0, min(100, (50.0 - authenticity) * 1.2)))
-        # Browser mic naturally has higher spectral anomaly (40-78) due to
-        # noise floor and frequency response. Use 0.55 multiplier for mic
-        # (was 0.70) so anomaly 60 → score 33 instead of 42, keeping
-        # HUMAN chunks in LOW risk range where they belong.
-        # File uploads use standard 0.90 multiplier.
         _anomaly_mult = 0.55 if _audio_source == "mic" else 0.90
         anomaly_audio_score = int(max(0.0, min(100.0, acoustic_anomaly * _anomaly_mult)))
         audio_score = max(authenticity_audio_score, anomaly_audio_score)
@@ -1163,21 +1132,14 @@ def build_risk_update(
         delta_boost = int(delta * settings.RISK_DELTA_BOOST_FACTOR)
         risk_score = min(100, risk_score + delta_boost)
-    # ── P2a: Risk dampening — prevent single-chunk LOW→CRITICAL ────
-    # If previous score was below 60 (LOW/MEDIUM) and new score jumps
-    # to CRITICAL (>=80), cap at 79 unless 2+ recent HIGH scores in
-    # the session history support the escalation.
     if previous_score is not None and previous_score < 60 and risk_score >= 80:
         recent_high = sum(1 for s in _risk_history[-5:] if s >= 60)
         if recent_high < 2:
             risk_score = min(risk_score, 79)
             behaviour_signals.append("risk_dampened_no_prior_high")
-    # ── L3 fix: First-chunk guard ──────────────────────────────────────
-    # The very first chunk often contains connection noise / silence.
-    # Cap its risk at 60 (MEDIUM) so one noisy handshake doesn't set
-    # the session trajectory high — UNLESS there's a strong positive
-    # signal (AI voice, high acoustic anomaly, or fraud keywords).
     if _chunks_processed == 0 and risk_score > 60:
         has_strong_signal = (
             (classification == "AI_GENERATED" and confidence >= 0.80)
@@ -1189,10 +1151,7 @@ def build_risk_update(
             risk_score = 60
             behaviour_signals.append("first_chunk_capped")
-    # ── M4 fix: Cumulative risk escalation for sustained moderate signals ──
-    # If 3+ of the last 5 chunks scored ≥40 AND the current chunk also
-    # scores ≥40, apply a cumulative boost (3 pts per recent moderate chunk,
-    # max +15). This ensures sustained low-grade fraud eventually triggers alerts.
     if len(_risk_history) >= 3 and risk_score >= 40:
         recent_moderate = sum(1 for s in _risk_history[-5:] if s >= 40)
         if recent_moderate >= 3:
@@ -1200,23 +1159,15 @@ def build_risk_update(
             risk_score = min(100, risk_score + cumulative_boost)
             behaviour_signals.append("sustained_moderate_risk")
-    # ── P2b: Sustained AI voice escalation ───────────────────────────
-    # Instead of a flat floor at 70, escalate based on how many chunks
-    # have been classified as AI.  floor = 70 + min(20, ai_chunks * 5)
-    # This means: 1 AI chunk → 75, 2 → 80, 3 → 85, 4+ → 90.
-    # Raised confidence threshold to 0.92 (was 0.85) because with
-    # temperature scaling T=4.0, the softened model outputs 0.67-0.84
-    # for browser mic audio. Only truly confident AI predictions should
-    # trigger this floor escalation.
     if classification == "AI_GENERATED" and confidence >= 0.92:
         ai_floor = 70 + min(20, _voice_ai_chunks * 5)
         risk_score = max(risk_score, ai_floor)
         if _voice_ai_chunks >= 2:
             behaviour_signals.append("sustained_ai_voice")
-    # ── P1: AI-voice-aware CPI ───────────────────────────────────────
-    # Add an AI-voice ratio component so CPI doesn't stay at 0 when
-    # the only signal is the model detecting synthetic voice.
     _ai_ratio = (_voice_ai_chunks / max(1, _chunks_processed)) if _chunks_processed > 0 else 0.0
     if previous_score is None:
         cpi = min(100.0, max(0.0,
@@ -1264,11 +1215,7 @@ def build_risk_update(
         or any(signal in behaviour_signals for signal in strong_intent)
     )
-    # ── P5: First-chunk alert guard ──────────────────────────────────
-    # On the very first chunk (_chunks_processed == 0), suppress the
-    # alert unless CRITICAL (risk >= 80) or strong semantic intent.
-    # This prevents a single false-positive chunk from triggering an
-    # alert that will persist in the session history.
     if alert_triggered and _chunks_processed == 0:
         has_strong_intent = any(s in behaviour_signals for s in strong_intent)
         if risk_level != "CRITICAL" and not has_strong_intent:
@@ -1424,14 +1371,9 @@ async def process_audio_chunk(
         f"({analysis_result.confidence_score:.0%}) in {analyze_ms:.0f}ms"
     )
-    # ── Short-chunk guard (bidirectional) ────────────────────────────
-    # Audio segments shorter than 2 s give the classifier insufficient
-    # spectral context, leading to unreliable predictions in both
-    # directions (e.g. a 1.6 s human tail flipping to AI 100%, or a
-    # short synthetic tail flipping to HUMAN 99%).  When the session
-    # already has a clear majority classification, we carry that
-    # forward instead of trusting a sub-2-second segment.
-    MIN_RELIABLE_DURATION = 2.0  # seconds
     if duration_sec < MIN_RELIABLE_DURATION:
         async with SESSION_LOCK:
             _sess = get_session_state(session_id)
@@ -1625,11 +1567,7 @@ async def process_audio_chunk(
             session.final_voice_classification = voice_classification
             session.final_voice_confidence = voice_confidence
-        # ── P4: Reconcile final_call_label with majority vote ────────
-        # If the majority vote says HUMAN but the watermark-based label
-        # is FRAUD, downgrade.  Use average risk (not max) to decide
-        # between SPAM and SAFE — a single spike shouldn't override an
-        # otherwise clean session.
         if session.final_voice_classification == "HUMAN" and session.final_call_label == "FRAUD":
             avg_risk = sum(session.risk_history) / max(1, len(session.risk_history))
             session.final_call_label = "SPAM" if avg_risk >= 30 else "SAFE"
@@ -1638,19 +1576,14 @@ async def process_audio_chunk(
         elif session.final_voice_classification == "AI_GENERATED" and session.final_call_label == "SAFE":
             session.final_call_label = "SPAM"
-        # ── P5: Average risk sanity check ────────────────────────────
-        # When the average risk across all chunks is LOW (< 35) but the
-        # label is FRAUD (because one or two spikes hit max_risk_score),
-        # downgrade to SPAM.  A session where 80%+ of chunks are SAFE
-        # should not be labelled FRAUD — the spikes were likely
-        # misclassifications from browser mic audio artifacts.
         if session.final_call_label == "FRAUD" and session.chunks_processed >= 5:
             avg_risk = sum(session.risk_history) / max(1, len(session.risk_history))
             if avg_risk < 35:
                 session.final_call_label = "SPAM"
                 logger.info(
-                    f"P5 sanity: downgraded FRAUD → SPAM (avg_risk={avg_risk:.1f}, "
-                    f"chunks={session.chunks_processed})"
                 )
         if scored["alert"].triggered:
@@ -1852,7 +1785,7 @@ async def analyze_realtime_chunk(
 @app.websocket("/api/voice-detection/v1/session/{session_id}/stream")
 async def stream_realtime_session(websocket: WebSocket, session_id: str):
     """WebSocket endpoint for continuous chunk-based analysis."""
-    # L4 fix: accept auth via query param (legacy) OR first-message auth
     has_query_key = verify_websocket_api_key(websocket)
     if not has_query_key:
         # No query-param key — accept connection and require first-message auth
@@ -1872,7 +1805,7 @@ async def stream_realtime_session(websocket: WebSocket, session_id: str):
     request_id = f"ws-{session_id[:8]}"
     ws_start = time.time()
-    # L4 fix: if no query-param key, require first-message auth
     if not has_query_key:
         try:
             auth_msg = await asyncio.wait_for(websocket.receive_json(), timeout=10.0)
@@ -1890,7 +1823,7 @@ async def stream_realtime_session(websocket: WebSocket, session_id: str):
     try:
         while True:
-            # M8 fix: enforce max connection duration
             elapsed = time.time() - ws_start
             if elapsed >= settings.WS_MAX_DURATION_SECONDS:
                 await websocket.send_json({
@@ -1900,7 +1833,7 @@ async def stream_realtime_session(websocket: WebSocket, session_id: str):
                 await websocket.close(code=1000, reason="Max duration exceeded")
                 break
-            # M8 fix: enforce idle timeout
             try:
                 payload = await asyncio.wait_for(
                     websocket.receive_json(),
@@ -1932,7 +1865,7 @@ async def stream_realtime_session(websocket: WebSocket, session_id: str):
             except ValueError as e:
                 await websocket.send_json({"status": "error", "message": str(e)})
     except WebSocketDisconnect:
-        logger.info(f"[{request_id}] WebSocket disconnected")
 @app.get("/v1/session/{session_id}/summary", response_model=SessionSummaryResponse, include_in_schema=False)
@@ -2035,40 +1968,28 @@ async def detect_voice(
     """
     Returns classification result with confidence score and explanation.
     """
-    # Log request info for debugging
     request_id = getattr(request.state, 'request_id', 'unknown')
-    audio_size_kb = len(voice_request.audioBase64) * 3 / 4 / 1024  # Approximate decoded size
-    logger.info(f"[{request_id}] Voice detection request: language={voice_request.language}, format={voice_request.audioFormat}, size~{audio_size_kb:.1f}KB")
     voice_request.language = validate_supported_language(voice_request.language)
     validate_supported_format(voice_request.audioFormat)
-    # Hard timeout guard: evaluator kills requests at 30s — bail at 20s with a safe fallback
     LEGACY_TIMEOUT_SECONDS = 20
     try:
-        # Step 1: Decode Base64 (async - runs in thread pool)
-        logger.info(f"[{request_id}]   -> Decoding Base64...")
         decode_start = time.perf_counter()
         audio_bytes = await asyncio.to_thread(decode_base64_audio, voice_request.audioBase64)
-        decode_time = (time.perf_counter() - decode_start) * 1000
-        # Step 2: Load audio (async - runs in thread pool)
-        logger.info(f"[{request_id}]   -> Loading audio... (decode took {decode_time:.0f}ms)")
-        load_start = time.perf_counter()
         audio, sr = await asyncio.to_thread(load_audio_from_bytes, audio_bytes, 16000, voice_request.audioFormat)
-        load_time = (time.perf_counter() - load_start) * 1000
-        # Truncate long audio to avoid timeout (keep first 20s max — plenty for classification)
         max_samples = sr * 20
         if len(audio) > max_samples:
-            logger.warning(f"[{request_id}]   -> Truncating audio from {len(audio)/sr:.1f}s to 20s for timeout safety")
             audio = audio[:max_samples]
-        # Step 3: ML Analysis (async - runs in thread pool, CPU-bound) with timeout guard
         duration_sec = len(audio) / sr
-        logger.info(f"[{request_id}]   -> Analyzing {duration_sec:.1f}s audio... (load took {load_time:.0f}ms)")
-        analyze_start = time.perf_counter()
         remaining_budget = LEGACY_TIMEOUT_SECONDS - (time.perf_counter() - decode_start)
         if remaining_budget < 2:
             raise asyncio.TimeoutError("Insufficient time budget for analysis")
@@ -2076,11 +1997,11 @@ async def detect_voice(
             asyncio.to_thread(analyze_voice, audio, sr, voice_request.language),
             timeout=max(2.0, remaining_budget)
         )
-        analyze_time = (time.perf_counter() - analyze_start) * 1000
-        logger.info(f"[{request_id}]   -> Analysis complete: {result.classification} ({result.confidence_score:.0%}) in {analyze_time:.0f}ms")
-        # Extract metrics if available
         metrics = None
         if result.features:
             metrics = ForensicMetrics(
@@ -2094,7 +2015,6 @@ async def detect_voice(
         explanation = result.explanation
         recommended_action = None
         response_classification = result.classification
-        # Never return UNCERTAIN on legacy endpoint — evaluator only accepts HUMAN / AI_GENERATED
         if model_uncertain:
             explanation = (
                 "Model uncertainty detected due fallback inference. "
@@ -2111,7 +2031,6 @@ async def detect_voice(
                 "credentials. Verify caller identity through official channels."
             )
-        # Return response
         return VoiceDetectionResponse(
             status="success",
             language=voice_request.language,
@@ -2124,13 +2043,13 @@ async def detect_voice(
         )
     except ValueError as e:
-        logger.warning(f"[{request_id}]   [VALIDATION_ERROR] {e}")
         raise HTTPException(
             status_code=400,
             detail={"status": "error", "message": str(e)}
         )
     except asyncio.TimeoutError:
-        logger.warning(f"[{request_id}]   [TIMEOUT] Legacy endpoint exceeded {LEGACY_TIMEOUT_SECONDS}s budget — returning safe fallback")
         return VoiceDetectionResponse(
             status="success",
             language=voice_request.language,
@@ -2142,10 +2061,10 @@ async def detect_voice(
             recommendedAction="Analysis took too long. Verify caller identity through official channels.",
         )
     except Exception as e:
-        logger.error(f"[{request_id}]   [PROCESSING_ERROR] {e}", exc_info=True)
         raise HTTPException(
             status_code=500,
-            detail={"status": "error", "message": f"Internal Server Error (request_id={request_id})"}
         )
@@ -2215,7 +2134,7 @@ async def http_exception_handler(request: Request, exc: HTTPException):
 @app.exception_handler(Exception)
 async def global_exception_handler(request: Request, exc: Exception):
     """Global handler to catch unhandled exceptions and prevent stack traces."""
-    logger.error(f"Unhandled error: {exc}", exc_info=True)
     return JSONResponse(
         status_code=500,
         content={"status": "error", "message": "Internal Server Error"}
@@ -2225,23 +2144,3 @@ async def global_exception_handler(request: Request, exc: Exception):
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=settings.PORT)

 from slowapi.util import get_remote_address
 from slowapi.errors import RateLimitExceeded
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 limiter = Limiter(key_func=get_remote_address, default_limits=["1000/minute"])
 from audio_utils import decode_base64_audio, load_audio_from_bytes
 except Exception:  # pragma: no cover - optional dependency
     redis = None
 MAX_AUDIO_BASE64_LENGTH = settings.MAX_AUDIO_SIZE_MB * 1024 * 1024 * 4 // 3
 SESSION_STORE: Dict[str, SessionState] = {}
 SESSION_LOCK = asyncio.Lock()
+SESSION_LOCKS: Dict[str, asyncio.Lock] = {}
 SESSION_STORE_BACKEND_ACTIVE = "memory"
 REDIS_CLIENT: Any = None
 ASR_INFLIGHT_TASKS: set[asyncio.Task] = set()
 ASR_INFLIGHT_LOCK = asyncio.Lock()
 def use_redis_session_store() -> bool:
     """Return whether redis-backed session store is active."""
     return SESSION_STORE_BACKEND_ACTIVE == "redis" and REDIS_CLIENT is not None
 # Detect environment
 if settings.SPACE_ID:
+    logger.info("Running on HuggingFace Spaces: %s", settings.SPACE_ID)
 def get_session_lock(session_id: str) -> asyncio.Lock:
+    """Return a per-session lock, creating one if needed."""
     if session_id not in SESSION_LOCKS:
         SESSION_LOCKS[session_id] = asyncio.Lock()
     return SESSION_LOCKS[session_id]
 async def _periodic_session_purge(interval: int = 60) -> None:
+    """Background task: purge expired sessions every *interval* seconds."""
     while True:
         try:
             await asyncio.sleep(interval)
             async with SESSION_LOCK:
                 removed = purge_expired_sessions()
                 stale_lock_keys = [k for k in SESSION_LOCKS if k not in SESSION_STORE]
                 for k in stale_lock_keys:
                     del SESSION_LOCKS[k]
         preload_model()
         logger.info("ML model loaded successfully")
     except Exception as e:
+        logger.error("Failed to preload model: %s", e)
     try:
         await asyncio.to_thread(run_startup_warmups)
     except Exception as exc:
         logger.warning("Startup warm-ups encountered an issue: %s", exc)
     purge_task = asyncio.create_task(_periodic_session_purge())
     yield
     logger.info("Shutting down...")
 app = FastAPI(
     title="AI Voice Detection API",
     description="Detects whether a voice sample is AI-generated or spoken by a real human",
     lifespan=lifespan
 )
 app.state.limiter = limiter
 app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
 _cors_origins = settings.ALLOWED_ORIGINS
 _cors_credentials = "*" not in _cors_origins
 if not _cors_credentials:
     allow_headers=["Content-Type", "x-api-key", "Authorization"],
 )
 @app.middleware("http")
 async def log_requests(request: Request, call_next):
     request_id = str(uuid.uuid4())[:8]
     request.state.request_id = request_id
     start_time = time.perf_counter()
     method = request.method
     path = request.url.path
     if method == "POST":
+        logger.info("[%s] [START] %s %s", request_id, method, path)
     response = await call_next(request)
     duration_ms = (time.perf_counter() - start_time) * 1000
     status_code = response.status_code
     if method == "POST":
         status_label = "[OK]" if status_code == 200 else "[ERR]" if status_code >= 400 else "[WARN]"
+        logger.info("[%s] %s END %s %s -> %s (%0.fms)", request_id, status_label, method, path, status_code, duration_ms)
     response.headers["X-Request-ID"] = request_id
     response.headers["X-Response-Time"] = f"{duration_ms:.0f}ms"
     response.headers["X-Content-Type-Options"] = "nosniff"
     response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
     # Relax CSP to allow standard API documentation via CDNs (ReDoc/Swagger)
     response.headers["Content-Security-Policy"] = (
     return response
 class VoiceDetectionRequest(BaseModel):
     """Request body for voice detection."""
     language: str = Field(default="Auto", description="Language hint (Auto, English, Hindi, Hinglish, Tamil, Malayalam, Telugu). Defaults to auto-detect.")
     """Validate supported language. Falls back to 'Auto' for unknown languages so the
     evaluator never gets a 400 for an unexpected language hint."""
     if language not in settings.SUPPORTED_LANGUAGES:
+        logger.warning("Unsupported language '%s' — falling back to 'Auto'", language)
         return "Auto"
     return language
 def update_session_behaviour_state(session: SessionState, language_analysis: Dict[str, Any]) -> Dict[str, Any]:
     """Update session-level behaviour score from transcript and semantic trends."""
     transcript_source = str(language_analysis.get("transcript_raw", language_analysis.get("transcript", "")))
     transcript = normalize_transcript_for_behavior(transcript_source)
     semantic_flags = list(language_analysis.get("semantic_flags", []))
     keyword_categories = list(language_analysis.get("keyword_categories", []))
         confidence_audio = int(round(confidence * 100))
         anomaly_audio = int(max(0.0, min(100.0, acoustic_anomaly * 0.85)))
         audio_score = max(confidence_audio, anomaly_audio)
+        # Dampen audio_score when signal forensics contradict AI classification
+        # for mic-source audio (browser mic has authenticity 34-60 naturally).
         if authenticity > 35 and _audio_source == "mic":
             auth_dampen = max(0.50, 1.0 - (authenticity - 35) / 100.0)
             audio_score = int(round(audio_score * auth_dampen))
     else:
         authenticity_audio_score = int(max(0, min(100, (50.0 - authenticity) * 1.2)))
+        # Mic audio has higher spectral anomaly (40-78); use lower multiplier.
         _anomaly_mult = 0.55 if _audio_source == "mic" else 0.90
         anomaly_audio_score = int(max(0.0, min(100.0, acoustic_anomaly * _anomaly_mult)))
         audio_score = max(authenticity_audio_score, anomaly_audio_score)
         delta_boost = int(delta * settings.RISK_DELTA_BOOST_FACTOR)
         risk_score = min(100, risk_score + delta_boost)
+    # Risk dampening: prevent single-chunk LOW→CRITICAL jumps.
     if previous_score is not None and previous_score < 60 and risk_score >= 80:
         recent_high = sum(1 for s in _risk_history[-5:] if s >= 60)
         if recent_high < 2:
             risk_score = min(risk_score, 79)
             behaviour_signals.append("risk_dampened_no_prior_high")
+    # First-chunk guard: cap noise-only first chunks at MEDIUM.
     if _chunks_processed == 0 and risk_score > 60:
         has_strong_signal = (
             (classification == "AI_GENERATED" and confidence >= 0.80)
             risk_score = 60
             behaviour_signals.append("first_chunk_capped")
+    # Cumulative escalation for sustained moderate signals.
     if len(_risk_history) >= 3 and risk_score >= 40:
         recent_moderate = sum(1 for s in _risk_history[-5:] if s >= 40)
         if recent_moderate >= 3:
             risk_score = min(100, risk_score + cumulative_boost)
             behaviour_signals.append("sustained_moderate_risk")
+    # Sustained AI voice floor escalation.
+    # floor = 70 + min(20, ai_chunks * 5)
     if classification == "AI_GENERATED" and confidence >= 0.92:
         ai_floor = 70 + min(20, _voice_ai_chunks * 5)
         risk_score = max(risk_score, ai_floor)
         if _voice_ai_chunks >= 2:
             behaviour_signals.append("sustained_ai_voice")
+    # AI-voice-aware CPI includes synthetic voice ratio.
     _ai_ratio = (_voice_ai_chunks / max(1, _chunks_processed)) if _chunks_processed > 0 else 0.0
     if previous_score is None:
         cpi = min(100.0, max(0.0,
         or any(signal in behaviour_signals for signal in strong_intent)
     )
+    # First-chunk alert guard: suppress unless CRITICAL or strong intent.
     if alert_triggered and _chunks_processed == 0:
         has_strong_intent = any(s in behaviour_signals for s in strong_intent)
         if risk_level != "CRITICAL" and not has_strong_intent:
         f"({analysis_result.confidence_score:.0%}) in {analyze_ms:.0f}ms"
     )
+    # Short-chunk guard: sub-2s segments are unreliable; carry forward
+    # the session's majority classification instead.
+    MIN_RELIABLE_DURATION = 2.0
     if duration_sec < MIN_RELIABLE_DURATION:
         async with SESSION_LOCK:
             _sess = get_session_state(session_id)
             session.final_voice_classification = voice_classification
             session.final_voice_confidence = voice_confidence
+        # Reconcile final_call_label with majority vote.
         if session.final_voice_classification == "HUMAN" and session.final_call_label == "FRAUD":
             avg_risk = sum(session.risk_history) / max(1, len(session.risk_history))
             session.final_call_label = "SPAM" if avg_risk >= 30 else "SAFE"
         elif session.final_voice_classification == "AI_GENERATED" and session.final_call_label == "SAFE":
             session.final_call_label = "SPAM"
+        # Average risk sanity check: downgrade FRAUD when most chunks are LOW.
         if session.final_call_label == "FRAUD" and session.chunks_processed >= 5:
             avg_risk = sum(session.risk_history) / max(1, len(session.risk_history))
             if avg_risk < 35:
                 session.final_call_label = "SPAM"
                 logger.info(
+                    "Sanity: downgraded FRAUD -> SPAM (avg_risk=%.1f, chunks=%d)",
+                    avg_risk, session.chunks_processed,
                 )
         if scored["alert"].triggered:
 @app.websocket("/api/voice-detection/v1/session/{session_id}/stream")
 async def stream_realtime_session(websocket: WebSocket, session_id: str):
     """WebSocket endpoint for continuous chunk-based analysis."""
+    # Accept auth via query-param or first-message token
     has_query_key = verify_websocket_api_key(websocket)
     if not has_query_key:
         # No query-param key — accept connection and require first-message auth
     request_id = f"ws-{session_id[:8]}"
     ws_start = time.time()
+    # Fall back to first-message authentication
     if not has_query_key:
         try:
             auth_msg = await asyncio.wait_for(websocket.receive_json(), timeout=10.0)
     try:
         while True:
+            # Enforce max connection duration
             elapsed = time.time() - ws_start
             if elapsed >= settings.WS_MAX_DURATION_SECONDS:
                 await websocket.send_json({
                 await websocket.close(code=1000, reason="Max duration exceeded")
                 break
+            # Enforce idle timeout
             try:
                 payload = await asyncio.wait_for(
                     websocket.receive_json(),
             except ValueError as e:
                 await websocket.send_json({"status": "error", "message": str(e)})
     except WebSocketDisconnect:
+        logger.info("[%s] WebSocket disconnected", request_id)
 @app.get("/v1/session/{session_id}/summary", response_model=SessionSummaryResponse, include_in_schema=False)
     """
     Returns classification result with confidence score and explanation.
     """
     request_id = getattr(request.state, 'request_id', 'unknown')
+    audio_size_kb = len(voice_request.audioBase64) * 3 / 4 / 1024
+    logger.info("[%s] Voice detection: lang=%s, fmt=%s, size~%.1fKB",
+                request_id, voice_request.language, voice_request.audioFormat, audio_size_kb)
     voice_request.language = validate_supported_language(voice_request.language)
     validate_supported_format(voice_request.audioFormat)
     LEGACY_TIMEOUT_SECONDS = 20
     try:
         decode_start = time.perf_counter()
         audio_bytes = await asyncio.to_thread(decode_base64_audio, voice_request.audioBase64)
         audio, sr = await asyncio.to_thread(load_audio_from_bytes, audio_bytes, 16000, voice_request.audioFormat)
         max_samples = sr * 20
         if len(audio) > max_samples:
+            logger.warning("[%s] Truncating audio from %.1fs to 20s", request_id, len(audio) / sr)
             audio = audio[:max_samples]
         duration_sec = len(audio) / sr
         remaining_budget = LEGACY_TIMEOUT_SECONDS - (time.perf_counter() - decode_start)
         if remaining_budget < 2:
             raise asyncio.TimeoutError("Insufficient time budget for analysis")
             asyncio.to_thread(analyze_voice, audio, sr, voice_request.language),
             timeout=max(2.0, remaining_budget)
         )
+        analyze_time = (time.perf_counter() - decode_start) * 1000
+        logger.info("[%s] Analysis complete: %s (%.0f%%) in %.0fms",
+                    request_id, result.classification, result.confidence_score * 100, analyze_time)
         metrics = None
         if result.features:
             metrics = ForensicMetrics(
         explanation = result.explanation
         recommended_action = None
         response_classification = result.classification
         if model_uncertain:
             explanation = (
                 "Model uncertainty detected due fallback inference. "
                 "credentials. Verify caller identity through official channels."
             )
         return VoiceDetectionResponse(
             status="success",
             language=voice_request.language,
         )
     except ValueError as e:
+        logger.warning("[%s] Validation error: %s", request_id, e)
         raise HTTPException(
             status_code=400,
             detail={"status": "error", "message": str(e)}
         )
     except asyncio.TimeoutError:
+        logger.warning("[%s] Legacy endpoint exceeded %ds budget", request_id, LEGACY_TIMEOUT_SECONDS)
         return VoiceDetectionResponse(
             status="success",
             language=voice_request.language,
             recommendedAction="Analysis took too long. Verify caller identity through official channels.",
         )
     except Exception as e:
+        logger.error("[%s] Processing error: %s", request_id, e, exc_info=True)
         raise HTTPException(
             status_code=500,
+            detail={"status": "error", "message": "Internal Server Error"}
         )
 @app.exception_handler(Exception)
 async def global_exception_handler(request: Request, exc: Exception):
     """Global handler to catch unhandled exceptions and prevent stack traces."""
+    logger.error("Unhandled error: %s", exc, exc_info=True)
     return JSONResponse(
         status_code=500,
         content={"status": "error", "message": "Internal Server Error"}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=settings.PORT)

model.py CHANGED Viewed

@@ -14,14 +14,12 @@ import warnings
 from config import settings
-# Configure logging
 logger = logging.getLogger(__name__)
-# Suppress warnings
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", category=UserWarning)
-# ── Heuristic thresholds (M7 fix: centralised for easy tuning) ──────
 HEURISTIC_THRESHOLDS = {
     # Pitch scoring
     "pitch_optimal_stability": float(os.getenv("PITCH_OPTIMAL_STABILITY", "0.20")),
@@ -46,7 +44,6 @@ _model = None
 _processor = None
 _device = None
 @dataclass
 class AnalysisResult:
     """Result of voice analysis."""
@@ -64,7 +61,7 @@ def get_device():
             _device = "cuda"
         else:
             _device = "cpu"
-        logger.info(f"Using device: {_device}")
     return _device
@@ -75,11 +72,9 @@ def _detect_label_inversion(model):
     """Check once at load time whether this model needs label flipping."""
     global _invert_labels
     name = getattr(model.config, '_name_or_path', '').lower()
-    if 'shivam-2211' in name or 'voice-detection-model' in name:
-        _invert_labels = True
-        logger.info("Model has inverted training labels — label flip enabled (logged once)")
-    else:
-        _invert_labels = False
 def load_model():
@@ -102,10 +97,10 @@ def load_model():
         backup_model = settings.VOICE_MODEL_BACKUP_ID
         if os.path.exists(local_path):
-            logger.info(f"Loading local fine-tuned model from: {local_path}")
             model_name = local_path
         else:
-            logger.info(f"Loading model from HuggingFace Hub: {hf_model}")
             model_name = hf_model
         try:
@@ -113,10 +108,10 @@ def load_model():
             _model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
             _model.to(get_device())
             _model.eval()
-            logger.info(f"Model loaded successfully: {model_name}")
             _detect_label_inversion(_model)
         except Exception as e:
-            logger.error(f"Failed to load model {model_name}: {e}")
             if model_name != backup_model:
                 logger.warning("Trying backup model...")
                 model_name = backup_model
@@ -125,7 +120,7 @@ def load_model():
                     _model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
                     _model.to(get_device())
                     _model.eval()
-                    logger.info(f"Backup model loaded: {model_name}")
                     _detect_label_inversion(_model)
                 except Exception as e2:
                     raise RuntimeError(f"Could not load any model: {e2}")
@@ -212,7 +207,7 @@ def extract_signal_features(audio: np.ndarray, sr: int, fast_mode: bool = False)
         features["harmonic_noise_ratio_db"] = hnr_db
     except Exception as e:
-        logger.warning(f"Feature extraction error: {e}")
         features = {
             "pitch_stability": 0.5,
             "jitter": 0.05,
@@ -478,12 +473,12 @@ def classify_with_model(audio: np.ndarray, sr: int) -> Tuple[str, float]:
     model, processor = load_model()
     device = get_device()
-    # Normalize audio to prevent clipping issues
     max_val = np.max(np.abs(audio))
     if max_val > 0:
         audio = audio / max_val
-    # Resample to 16kHz if needed (Wav2Vec2 expects 16kHz)
     target_sr = 16000
     if sr != target_sr:
         audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
@@ -495,33 +490,25 @@ def classify_with_model(audio: np.ndarray, sr: int) -> Tuple[str, float]:
         return_tensors="pt",
         padding=True
     )
-    # Move to device
     inputs = {k: v.to(device) for k, v in inputs.items()}
-    # Run inference
     with torch.no_grad():
         outputs = model(**inputs)
         logits = outputs.logits
-        # Temperature scaling: divide logits by T > 1 to soften the
-        # probability distribution.  The model routinely saturates at
-        # 1.00 confidence for browser-mic audio, leaving zero room for
-        # the heuristic cross-check to correct a wrong classification.
         temperature = float(settings.MODEL_LOGIT_TEMPERATURE)
         if temperature > 1.0:
             logits = logits / temperature
         probabilities = torch.softmax(logits, dim=-1)
         # Get prediction
         predicted_class = torch.argmax(probabilities, dim=-1).item()
         confidence = probabilities[0][predicted_class].item()
-    # Map class to label using the model's id2label config.
-    # IMPORTANT: HuggingFace stores id2label with STRING keys ("0", "1")
-    # but predicted_class from torch.argmax().item() is an int.
-    # We must normalise the keys to int so .get() actually matches.
     raw_id2label = getattr(model.config, 'id2label', None) or {}
     id2label = {int(k): v for k, v in raw_id2label.items()}
     label = id2label.get(predicted_class, 'UNKNOWN')
@@ -532,21 +519,10 @@ def classify_with_model(audio: np.ndarray, sr: int) -> Tuple[str, float]:
         [f"{p:.4f}" for p in probabilities[0].cpu().tolist()],
     )
-    # ── Label interpretation ──
-    # The primary model (shivam-2211/voice-detection-model) was trained with
-    # inverted label semantics: its class-0 output actually corresponds to
-    # REAL/human audio and class-1 to FAKE/AI-generated, despite the config
-    # claiming 0=FAKE and 1=REAL.  Detected once at load time via
-    # _detect_label_inversion().
     if _invert_labels:
-        # Flip: treat model class-0 as REAL, class-1 as FAKE
-        if predicted_class == 0:
-            classification = "HUMAN"
-        else:
-            classification = "AI_GENERATED"
-        # confidence stays the same (model's own softmax output)
     else:
-        # Standard mapping: use labels from config
         if label.upper() in ['FAKE', 'SPOOF', 'SYNTHETIC', 'AI']:
             classification = "AI_GENERATED"
         else:
@@ -586,7 +562,7 @@ def analyze_voice(audio: np.ndarray, sr: int, language: str = "English", realtim
         try:
             classification, ml_confidence = classify_with_model(audio, sr)
         except Exception as e:
-            logger.error(f"ML model error: {e}, falling back to signal analysis")
             ml_fallback = True
             classification = "HUMAN"
             ml_confidence = 0.5
@@ -611,39 +587,21 @@ def analyze_voice(audio: np.ndarray, sr: int, language: str = "English", realtim
         ml_confidence = ai_probability if classification == "AI_GENERATED" else (1.0 - ai_probability)
         ml_confidence = float(max(0.5, min(0.99, ml_confidence)))
-    # ── Authenticity cross-check (REALTIME ONLY) ─────────────────────
-    # When the model says AI_GENERATED but the signal forensics indicate
-    # human-like audio (high authenticity), moderate the confidence.
-    # This prevents a poorly-calibrated model from steamrolling the
-    # heuristic evidence.  The model was fine-tuned on curated datasets
-    # and can misclassify real browser-mic audio as synthetic.
-    #
-    # IMPORTANT: This override is ONLY for realtime browser-mic sessions.
-    # File uploads use clean audio paths and the model's classification
-    # should be trusted.  Applying the override to file uploads would
-    # cause real AI-generated audio to be misclassified as HUMAN.
-    #
-    # Browser-mic audio typically has authenticity 34-60 and anomaly 40-78
-    # (naturally higher noise floor and spectral irregularity). The
-    # thresholds must reflect these real-world ranges.
     if realtime and source == "mic" and classification == "AI_GENERATED" and authenticity_score > 35:
-        # The higher the authenticity, the more we moderate.
-        # authenticity 35 → no change.  authenticity 60 → cap at ~0.75
-        # authenticity 80 → cap at ~0.55
         moderation_factor = max(0.50, 1.0 - (authenticity_score - 35) / 100.0)
         if ml_confidence > moderation_factor:
             logger.info(
-                "Authenticity cross-check: moderated AI confidence %.2f → %.2f "
                 "(authenticity=%.1f, anomaly=%.1f)",
                 ml_confidence, moderation_factor,
                 authenticity_score, acoustic_anomaly_score,
             )
             ml_confidence = moderation_factor
-        # If authenticity indicates human-like features (>40) and anomaly
-        # is not extreme (<65), override the classification — the signal
-        # evidence strongly contradicts the model. Browser mic audio
-        # naturally has anomaly 22-64 and authenticity 34-68, so the
-        # thresholds must cover these real-world ranges.
         if authenticity_score > 40 and acoustic_anomaly_score < 65:
             logger.info(
                 "Authenticity override: flipping AI_GENERATED → HUMAN "
@@ -682,4 +640,4 @@ def preload_model():
     try:
         load_model()
     except Exception as e:
-        logger.error(f"Model preload failed: {e}")

 from config import settings
 logger = logging.getLogger(__name__)
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", category=UserWarning)
+# Heuristic thresholds (env-configurable for tuning)
 HEURISTIC_THRESHOLDS = {
     # Pitch scoring
     "pitch_optimal_stability": float(os.getenv("PITCH_OPTIMAL_STABILITY", "0.20")),
 _processor = None
 _device = None
 @dataclass
 class AnalysisResult:
     """Result of voice analysis."""
             _device = "cuda"
         else:
             _device = "cpu"
+        logger.info("Using device: %s", _device)
     return _device
     """Check once at load time whether this model needs label flipping."""
     global _invert_labels
     name = getattr(model.config, '_name_or_path', '').lower()
+    _invert_labels = 'shivam-2211' in name or 'voice-detection-model' in name
+    if _invert_labels:
+        logger.info("Label inversion enabled for model: %s", name)
 def load_model():
         backup_model = settings.VOICE_MODEL_BACKUP_ID
         if os.path.exists(local_path):
+            logger.info("Loading local model from: %s", local_path)
             model_name = local_path
         else:
+            logger.info("Loading model from HuggingFace Hub: %s", hf_model)
             model_name = hf_model
         try:
             _model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
             _model.to(get_device())
             _model.eval()
+            logger.info("Model loaded: %s", model_name)
             _detect_label_inversion(_model)
         except Exception as e:
+            logger.error("Failed to load model %s: %s", model_name, e)
             if model_name != backup_model:
                 logger.warning("Trying backup model...")
                 model_name = backup_model
                     _model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
                     _model.to(get_device())
                     _model.eval()
+                    logger.info("Backup model loaded: %s", model_name)
                     _detect_label_inversion(_model)
                 except Exception as e2:
                     raise RuntimeError(f"Could not load any model: {e2}")
         features["harmonic_noise_ratio_db"] = hnr_db
     except Exception as e:
+        logger.warning("Feature extraction error: %s", e)
         features = {
             "pitch_stability": 0.5,
             "jitter": 0.05,
     model, processor = load_model()
     device = get_device()
+    # Normalize audio
     max_val = np.max(np.abs(audio))
     if max_val > 0:
         audio = audio / max_val
+    # Resample to 16kHz if needed
     target_sr = 16000
     if sr != target_sr:
         audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
         return_tensors="pt",
         padding=True
     )
     inputs = {k: v.to(device) for k, v in inputs.items()}
     with torch.no_grad():
         outputs = model(**inputs)
         logits = outputs.logits
+        # Temperature scaling: soften probability distribution so the
+        # heuristic cross-check can still correct misclassifications.
         temperature = float(settings.MODEL_LOGIT_TEMPERATURE)
         if temperature > 1.0:
             logits = logits / temperature
         probabilities = torch.softmax(logits, dim=-1)
         # Get prediction
         predicted_class = torch.argmax(probabilities, dim=-1).item()
         confidence = probabilities[0][predicted_class].item()
+    # Normalise id2label keys from str to int (HF convention mismatch).
     raw_id2label = getattr(model.config, 'id2label', None) or {}
     id2label = {int(k): v for k, v in raw_id2label.items()}
     label = id2label.get(predicted_class, 'UNKNOWN')
         [f"{p:.4f}" for p in probabilities[0].cpu().tolist()],
     )
+    # Label interpretation — see _detect_label_inversion() for rationale.
     if _invert_labels:
+        classification = "HUMAN" if predicted_class == 0 else "AI_GENERATED"
     else:
         if label.upper() in ['FAKE', 'SPOOF', 'SYNTHETIC', 'AI']:
             classification = "AI_GENERATED"
         else:
         try:
             classification, ml_confidence = classify_with_model(audio, sr)
         except Exception as e:
+            logger.error("ML model error: %s, falling back to signal analysis", e)
             ml_fallback = True
             classification = "HUMAN"
             ml_confidence = 0.5
         ml_confidence = ai_probability if classification == "AI_GENERATED" else (1.0 - ai_probability)
         ml_confidence = float(max(0.5, min(0.99, ml_confidence)))
+    # Authenticity cross-check (realtime mic only): when the model says
+    # AI_GENERATED but signal forensics show human-like audio, moderate
+    # the confidence or flip the classification.  Not applied to file
+    # uploads where the model should be trusted.
     if realtime and source == "mic" and classification == "AI_GENERATED" and authenticity_score > 35:
         moderation_factor = max(0.50, 1.0 - (authenticity_score - 35) / 100.0)
         if ml_confidence > moderation_factor:
             logger.info(
+                "Authenticity cross-check: moderated AI confidence %.2f -> %.2f "
                 "(authenticity=%.1f, anomaly=%.1f)",
                 ml_confidence, moderation_factor,
                 authenticity_score, acoustic_anomaly_score,
             )
             ml_confidence = moderation_factor
+        # Override when signal evidence strongly contradicts the model.
         if authenticity_score > 40 and acoustic_anomaly_score < 65:
             logger.info(
                 "Authenticity override: flipping AI_GENERATED → HUMAN "
     try:
         load_model()
     except Exception as e:
+        logger.error("Model preload failed: %s", e)

test_my_api.py DELETED Viewed

@@ -1,171 +0,0 @@
-"""
-Official evaluation script from the hackathon guide, configured with our 5 test files.
-This mirrors EXACTLY what the evaluator will run.
-"""
-import requests
-import base64
-import json
-def evaluate_voice_detection_api(endpoint_url, api_key, test_files):
-    if not endpoint_url:
-        print("Error: Endpoint URL is required")
-        return False
-    if not test_files or len(test_files) == 0:
-        print("Error: No test files provided")
-        return False
-    total_files = len(test_files)
-    score_per_file = 100 / total_files
-    total_score = 0
-    file_results = []
-    print(f"\n{'='*60}")
-    print(f"Starting Evaluation")
-    print(f"{'='*60}")
-    print(f"Endpoint: {endpoint_url}")
-    print(f"Total Test Files: {total_files}")
-    print(f"Score per File: {score_per_file:.2f}")
-    print(f"{'='*60}\n")
-    for idx, file_data in enumerate(test_files):
-        language = file_data.get('language', 'English')
-        file_path = file_data.get('file_path', '')
-        expected_classification = file_data.get('expected_classification', '')
-        print(f"Test {idx + 1}/{total_files}: {file_path}")
-        if not file_path or not expected_classification:
-            file_results.append({'fileIndex': idx, 'status': 'skipped', 'score': 0})
-            print(f"   Skipped: Missing file path or expected classification\n")
-            continue
-        try:
-            with open(file_path, 'rb') as audio_file:
-                audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
-        except Exception as e:
-            file_results.append({'fileIndex': idx, 'status': 'failed', 'message': f'Failed to read: {e}', 'score': 0})
-            print(f"   Failed to read file: {e}\n")
-            continue
-        headers = {'Content-Type': 'application/json', 'x-api-key': api_key}
-        request_body = {'language': language, 'audioFormat': 'mp3', 'audioBase64': audio_base64}
-        try:
-            response = requests.post(endpoint_url, headers=headers, json=request_body, timeout=30)
-            if response.status_code != 200:
-                file_results.append({'fileIndex': idx, 'status': 'failed', 'message': f'HTTP {response.status_code}', 'score': 0})
-                print(f"   HTTP Status: {response.status_code}")
-                print(f"   Response: {response.text[:200]}\n")
-                continue
-            response_data = response.json()
-            if not isinstance(response_data, dict):
-                file_results.append({'fileIndex': idx, 'status': 'failed', 'message': 'Not a JSON object', 'score': 0})
-                print(f"   Invalid response type\n")
-                continue
-            response_status = response_data.get('status', '')
-            response_classification = response_data.get('classification', '')
-            confidence_score = response_data.get('confidenceScore', None)
-            if not response_status or not response_classification or confidence_score is None:
-                file_results.append({'fileIndex': idx, 'status': 'failed', 'message': 'Missing required fields', 'score': 0})
-                print(f"   Missing required fields")
-                print(f"   Response: {json.dumps(response_data, indent=2)[:200]}\n")
-                continue
-            if response_status != 'success':
-                file_results.append({'fileIndex': idx, 'status': 'failed', 'message': f'Status: {response_status}', 'score': 0})
-                print(f"   Status not 'success': {response_status}\n")
-                continue
-            if not isinstance(confidence_score, (int, float)) or confidence_score < 0 or confidence_score > 1:
-                file_results.append({'fileIndex': idx, 'status': 'failed', 'message': f'Invalid confidence: {confidence_score}', 'score': 0})
-                print(f"   Invalid confidence score: {confidence_score}\n")
-                continue
-            valid_classifications = ['HUMAN', 'AI_GENERATED']
-            if response_classification not in valid_classifications:
-                file_results.append({'fileIndex': idx, 'status': 'failed', 'message': f'Invalid classification: {response_classification}', 'score': 0})
-                print(f"   Invalid classification: {response_classification}\n")
-                continue
-            # Score calculation
-            file_score = 0
-            if response_classification == expected_classification:
-                if confidence_score >= 0.8:
-                    file_score = score_per_file
-                    confidence_tier = "100%"
-                elif confidence_score >= 0.6:
-                    file_score = score_per_file * 0.75
-                    confidence_tier = "75%"
-                elif confidence_score >= 0.4:
-                    file_score = score_per_file * 0.5
-                    confidence_tier = "50%"
-                else:
-                    file_score = score_per_file * 0.25
-                    confidence_tier = "25%"
-                total_score += file_score
-                file_results.append({'fileIndex': idx, 'status': 'success', 'matched': True, 'score': round(file_score, 2),
-                                     'actualClassification': response_classification, 'confidenceScore': confidence_score})
-                print(f"   CORRECT: {response_classification}")
-                print(f"   Confidence: {confidence_score:.2f} -> {confidence_tier} of points")
-                print(f"   Score: {file_score:.2f}/{score_per_file:.2f}\n")
-            else:
-                file_results.append({'fileIndex': idx, 'status': 'success', 'matched': False, 'score': 0,
-                                     'actualClassification': response_classification, 'confidenceScore': confidence_score})
-                print(f"   WRONG: {response_classification} (Expected: {expected_classification})")
-                print(f"   Score: 0/{score_per_file:.2f}\n")
-        except requests.exceptions.Timeout:
-            file_results.append({'fileIndex': idx, 'status': 'failed', 'message': 'Timeout (>30s)', 'score': 0})
-            print(f"   TIMEOUT: Request took longer than 30 seconds\n")
-        except requests.exceptions.ConnectionError:
-            file_results.append({'fileIndex': idx, 'status': 'failed', 'message': 'Connection error', 'score': 0})
-            print(f"   CONNECTION ERROR\n")
-        except Exception as e:
-            file_results.append({'fileIndex': idx, 'status': 'failed', 'message': str(e), 'score': 0})
-            print(f"   ERROR: {e}\n")
-    final_score = round(total_score)
-    print(f"{'='*60}")
-    print(f"EVALUATION SUMMARY")
-    print(f"{'='*60}")
-    print(f"Total Files Tested: {total_files}")
-    print(f"Final Score: {final_score}/100")
-    print(f"{'='*60}\n")
-    successful = sum(1 for r in file_results if r.get('matched', False))
-    failed = sum(1 for r in file_results if r['status'] == 'failed')
-    wrong = sum(1 for r in file_results if r['status'] == 'success' and not r.get('matched', False))
-    print(f"Correct Classifications: {successful}/{total_files}")
-    print(f"Wrong Classifications: {wrong}/{total_files}")
-    print(f"Failed/Errors: {failed}/{total_files}\n")
-    with open('evaluation_results.json', 'w') as f:
-        json.dump({'finalScore': final_score, 'totalFiles': total_files, 'scorePerFile': round(score_per_file, 2),
-                   'successfulClassifications': successful, 'wrongClassifications': wrong, 'failedTests': failed,
-                   'fileResults': file_results}, f, indent=2)
-    print(f"Detailed results saved to: evaluation_results.json\n")
-    return True
-if __name__ == '__main__':
-    ENDPOINT_URL = 'https://shivam-2211-voice-detection-api.hf.space/api/voice-detection'
-    API_KEY = 'sk_test_voice_detection_2026'
-    DIR = r'c:\Users\shiva\OneDrive\Desktop\Voice Project\voice-detection-api\drive-download-20260216T053632Z-1-001'
-    TEST_FILES = [
-        {'language': 'English', 'file_path': f'{DIR}\\English_voice_AI_GENERATED.mp3', 'expected_classification': 'AI_GENERATED'},
-        {'language': 'Hindi',   'file_path': f'{DIR}\\Hindi_Voice_HUMAN.mp3',          'expected_classification': 'HUMAN'},
-        {'language': 'Malayalam','file_path': f'{DIR}\\Malayalam_AI_GENERATED.mp3',     'expected_classification': 'AI_GENERATED'},
-        {'language': 'Tamil',   'file_path': f'{DIR}\\TAMIL_VOICE__HUMAN.mp3',         'expected_classification': 'HUMAN'},
-        {'language': 'Telugu',  'file_path': f'{DIR}\\Telugu_Voice_AI_GENERATED.mp3',  'expected_classification': 'AI_GENERATED'},
-    ]
-    evaluate_voice_detection_api(ENDPOINT_URL, API_KEY, TEST_FILES)