Spaces:

amfafa
/

voice_authentication-sys

Running

App Files Files Community

amfafa commited on 15 days ago

Commit

7bd4461

verified ·

1 Parent(s): b659e09

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -327

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import io
 import json
 import math
 import time
@@ -10,13 +9,9 @@ import torchaudio
 import numpy as np
 import random
 import tempfile
-import base64
 import gradio as gr
 from datetime import datetime, timedelta
-# TORCHAUDIO COMPATIBILITY FIX
 if not hasattr(torchaudio, 'list_audio_backends'):
     torchaudio.list_audio_backends = lambda: ["soundfile"]
@@ -39,7 +34,6 @@ MAX_ATTEMPTS = 3
 LOCKOUT_MINUTES = 5
 COOLDOWN_SECONDS = 3
 ANTISPOOFING_THRESHOLD = 0.02
-LIVE_AUDIO_THRESHOLD = 0.5
@@ -97,15 +91,81 @@ for param in base_model.parameters():
 print("Loading AAM-Softmax checkpoint...")
 ckpt = torch.load(CKPT_PATH, map_location=DEVICE)
-num_classes = ckpt.get('num_classes', 227)
 classifier = SpeakerClassifier(input_dim=768, hidden_dim=512, num_classes=num_classes).to(DEVICE)
-classifier.load_state_dict(ckpt['classifier_state'])
 classifier.eval()
-print(f"Models loaded. Speakers trained on: {num_classes}")
-# DATABASE MANAGEMENT
 def load_db():
     if os.path.exists(DB_PATH):
@@ -113,7 +173,6 @@ def load_db():
             return json.load(f)
     return {}
 def save_db(db):
     with open(DB_PATH, 'w') as f:
         json.dump(db, f, indent=2, default=str)
@@ -123,7 +182,6 @@ def save_db(db):
 # AUDIO PROCESSING
 def load_audio(audio_input):
-    """Load audio from file path, tuple (sr, numpy), or bytes."""
     if isinstance(audio_input, tuple):
         sr, audio_np = audio_input
         wav = torch.tensor(audio_np, dtype=torch.float32)
@@ -132,7 +190,6 @@ def load_audio(audio_input):
         if wav.shape[0] > 1:
             wav = wav.mean(dim=0, keepdim=True)
         wav = wav.squeeze(0)
-        # Normalize int audio to float
         if wav.abs().max() > 1.0:
             wav = wav / 32768.0
         if sr != SAMPLE_RATE:
@@ -158,17 +215,13 @@ def load_audio(audio_input):
     else:
         raise ValueError(f"Unsupported audio input type: {type(audio_input)}")
-    # Pad or trim to MAX_LEN
     if wav.shape[0] > MAX_LEN:
         wav = wav[:MAX_LEN]
     elif wav.shape[0] < MAX_LEN:
         wav = F.pad(wav, (0, MAX_LEN - wav.shape[0]))
     return wav
 def extract_embedding(wav_tensor):
-    """Extract 512-dim speaker embedding from audio tensor."""
     with torch.no_grad():
         wav = wav_tensor.unsqueeze(0).to(DEVICE)
         outputs = base_model(wav)
@@ -177,123 +230,87 @@ def extract_embedding(wav_tensor):
         embedding = F.normalize(embedding, p=2, dim=1)
     return embedding.squeeze(0).cpu().numpy()
 def add_noise(wav_tensor, noise_level=0.005):
-    """Add Gaussian noise for data augmentation."""
     noise = torch.randn_like(wav_tensor) * noise_level
     return wav_tensor + noise
 # LIVENESS DETECTION
 def check_liveness(wav_tensor):
-    """Basic liveness check — detects silence or suspicious patterns."""
     wav_np = wav_tensor.numpy()
-    # Check if audio has enough energy (not silent)
     rms = np.sqrt(np.mean(wav_np ** 2))
     if rms < 0.001:
         return False, "Audio too quiet — possible silence or empty recording"
-    # Check for sufficient variation (not a constant tone)
     std = np.std(wav_np)
     if std < 0.001:
         return False, "Audio lacks variation — possible synthetic tone"
-    # Check zero-crossing rate (natural speech has moderate ZCR)
     zero_crossings = np.sum(np.abs(np.diff(np.sign(wav_np)))) / (2 * len(wav_np))
     if zero_crossings < 0.01:
         return False, "Abnormal audio pattern — possible replay attack"
-    # Check audio duration has content
     non_silent = np.abs(wav_np) > 0.01
     speech_ratio = np.sum(non_silent) / len(wav_np)
     if speech_ratio < 0.1:
         return False, "Insufficient speech content detected"
     return True, "Liveness check passed"
-# ANTISPOOFING CHECK
 def check_antispoofing(wav_tensor):
-    """Basic antispoofing — checks spectral characteristics."""
     wav_np = wav_tensor.numpy()
-    # Check spectral flatness (natural speech vs synthetic)
     fft = np.fft.rfft(wav_np)
     magnitude = np.abs(fft)
     magnitude = magnitude[magnitude > 0]
     if len(magnitude) == 0:
         return False, "No frequency content detected"
     geometric_mean = np.exp(np.mean(np.log(magnitude + 1e-10)))
     arithmetic_mean = np.mean(magnitude)
     spectral_flatness = geometric_mean / (arithmetic_mean + 1e-10)
     if spectral_flatness > (1.0 - ANTISPOOFING_THRESHOLD):
         return False, f"Spectral flatness too high ({spectral_flatness:.4f}) — possible synthetic audio"
-    # Check for unnaturally uniform amplitude
-    frame_size = 1600  # 100ms frames
     if len(wav_np) >= frame_size * 3:
         frames = [wav_np[i:i + frame_size] for i in range(0, len(wav_np) - frame_size, frame_size)]
         frame_energies = [np.sqrt(np.mean(f ** 2)) for f in frames]
         energy_std = np.std(frame_energies)
         if energy_std < 0.001:
             return False, "Unnaturally uniform energy — possible synthetic audio"
     return True, "Antispoofing check passed"
-# SECURITY: LOCKOUT & COOLDOWN
-attempt_tracker = {}  # {user_id: {"count": int, "last_attempt": datetime, "locked_until": datetime}}
 def check_security(user_id):
-    """Check if user is locked out or in cooldown."""
     now = datetime.now()
     if user_id not in attempt_tracker:
         return True, "OK"
     tracker = attempt_tracker[user_id]
-    # Check lockout
     if "locked_until" in tracker and tracker["locked_until"]:
         locked_until = datetime.fromisoformat(tracker["locked_until"])
         if now < locked_until:
             remaining = (locked_until - now).seconds
             return False, f"Account locked. Try again in {remaining} seconds."
         else:
-            # Lockout expired — reset
             tracker["count"] = 0
             tracker["locked_until"] = None
-    # Check cooldown
     if "last_attempt" in tracker and tracker["last_attempt"]:
         last = datetime.fromisoformat(tracker["last_attempt"])
         elapsed = (now - last).total_seconds()
         if elapsed < COOLDOWN_SECONDS:
             return False, f"Please wait {COOLDOWN_SECONDS - int(elapsed)} seconds before trying again."
     return True, "OK"
 def record_attempt(user_id, success):
-    """Record a verification attempt."""
     now = datetime.now()
     if user_id not in attempt_tracker:
         attempt_tracker[user_id] = {"count": 0, "last_attempt": None, "locked_until": None}
     tracker = attempt_tracker[user_id]
     tracker["last_attempt"] = now.isoformat()
     if success:
         tracker["count"] = 0
         tracker["locked_until"] = None
@@ -304,10 +321,9 @@ def record_attempt(user_id, success):
-# CORE FUNCTIONS: ENROLL & VERIFY
 def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=NUM_CLEAN_SAMPLES):
-    """Process a single enrollment sample. Collects NUM_CLEAN_SAMPLES then finalizes."""
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     if not full_name or not full_name.strip():
@@ -321,20 +337,16 @@ def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=
     try:
         wav = load_audio(audio_input)
-        # Liveness check
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             return f"Enrollment failed: {live_msg}"
-        # Antispoofing check
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             return f"Enrollment failed: {spoof_msg}"
-        # Extract clean embedding
         clean_emb = extract_embedding(wav)
-        # Generate noisy augmented embeddings
         noisy_embeddings = []
         for i in range(NUM_NOISY_COPIES):
             noise_level = 0.003 + (i * 0.002)
@@ -342,7 +354,6 @@ def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=
             noisy_emb = extract_embedding(noisy_wav)
             noisy_embeddings.append(noisy_emb)
-        # Load DB and accumulate samples
         db = load_db()
         if user_id not in db:
@@ -355,7 +366,6 @@ def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=
                 "samples_collected": 0
             }
-        # Store this sample's embeddings (1 clean + 4 noisy = 5 per sample)
         sample_data = {
             "clean": clean_emb.tolist(),
             "noisy": [e.tolist() for e in noisy_embeddings]
@@ -367,7 +377,6 @@ def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=
         samples_collected = db[user_id]["samples_collected"]
         if samples_collected >= total_samples:
-            # Finalize: average all embeddings (6 clean + 24 noisy = 30 total)
             all_embeddings = []
             for sample in db[user_id]["sample_embeddings"]:
                 all_embeddings.append(np.array(sample["clean"]))
@@ -380,7 +389,6 @@ def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=
             db[user_id]["voiceprint"] = avg_embedding.tolist()
             db[user_id]["status"] = "enrolled"
             db[user_id]["completed_at"] = datetime.now().isoformat()
-            # Remove raw samples to save space
             db[user_id]["sample_embeddings"] = []
             save_db(db)
@@ -394,8 +402,9 @@ def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=
         return f"Enrollment error: {str(e)}"
 def verify_speaker(audio_input, user_id):
-    """Verify a speaker against their stored voiceprint."""
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     if audio_input is None:
@@ -403,12 +412,10 @@ def verify_speaker(audio_input, user_id):
     user_id = user_id.strip().upper()
-    # Security check
     allowed, sec_msg = check_security(user_id)
     if not allowed:
         return f"ACCESS DENIED: {sec_msg}"
-    # Check user exists
     db = load_db()
     if user_id not in db:
         return f"Error: User '{user_id}' not found. Please enroll first."
@@ -421,19 +428,16 @@ def verify_speaker(audio_input, user_id):
     try:
         wav = load_audio(audio_input)
-        # Liveness check
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             record_attempt(user_id, False)
             return f"ACCESS DENIED: {live_msg}"
-        # Antispoofing check
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             record_attempt(user_id, False)
             return f"ACCESS DENIED: {spoof_msg}"
-        # Extract embedding and compare
         test_emb = extract_embedding(wav)
         stored_emb = np.array(db[user_id]["voiceprint"])
@@ -469,12 +473,13 @@ def verify_speaker(audio_input, user_id):
         return f"Verification error: {str(e)}"
 def list_users():
-    """List all enrolled users."""
     db = load_db()
     if not db:
         return "No users enrolled yet."
     lines = ["=== Enrolled Users ===\n"]
     for uid, data in db.items():
         name = data.get("full_name", "Unknown")
@@ -484,34 +489,23 @@ def list_users():
         lines.append(f"ID: {uid} | Name: {name} | Status: {status} | Samples: {samples} | Enrolled: {enrolled}")
     return "\n".join(lines)
 def delete_user(user_id):
-    """Delete a user's voiceprint."""
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     user_id = user_id.strip().upper()
     db = load_db()
     if user_id not in db:
         return f"Error: User '{user_id}' not found."
     name = db[user_id].get("full_name", user_id)
     del db[user_id]
     save_db(db)
-    # Clear attempt tracker too
     if user_id in attempt_tracker:
         del attempt_tracker[user_id]
     return f"User '{name}' ({user_id}) deleted successfully."
 def reset_lockout(user_id):
-    """Reset lockout for a user."""
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     user_id = user_id.strip().upper()
     if user_id in attempt_tracker:
         attempt_tracker[user_id] = {"count": 0, "last_attempt": None, "locked_until": None}
@@ -522,84 +516,53 @@ def reset_lockout(user_id):
 # GRADIO INTERFACE
-with gr.Blocks(
-    title="ATM Voice Authentication System",
-    theme=gr.themes.Soft()
-) as demo:
-    gr.Markdown(
-        """
-        # ATM Voice Authentication System
-        ### Voice-Based Speaker Verification for Banking Security
-        **Model:** UniSpeech-SAT + AAM-Softmax | **EER:** 3.94% | **Speakers Trained:** 227 Akan speakers
-        """
-    )
     with gr.Tabs():
-        # ---- ENROLL TAB ----
         with gr.Tab("Enroll"):
-            gr.Markdown(
-                """
-                ### Enroll New User
-                Record **6 voice samples** to create your voiceprint. Speak naturally for 3-4 seconds each time.
-                The system adds noise augmentation automatically (6 clean + 24 noisy = 30 embeddings averaged).
-                """
-            )
             with gr.Row():
                 with gr.Column():
-                    enroll_audio = gr.Audio(
-                        label="Record Voice Sample",
-                        sources=["microphone"],
-                        type="numpy"
-                    )
                     enroll_user_id = gr.Textbox(label="User ID (e.g., ATM_001)", placeholder="ATM_001")
                     enroll_name = gr.Textbox(label="Full Name", placeholder="Jochebed Fafa")
                     enroll_sample_num = gr.Number(label="Sample Number (1-6)", value=1, minimum=1, maximum=6, step=1)
                     enroll_btn = gr.Button("Enroll Sample", variant="primary")
                 with gr.Column():
                     enroll_result = gr.Textbox(label="Result", lines=4, interactive=False)
-            enroll_btn.click(
-                fn=enroll_sample,
-                inputs=[enroll_audio, enroll_user_id, enroll_name, enroll_sample_num],
-                outputs=enroll_result
-            )
-        # ---- VERIFY TAB ----
         with gr.Tab("Verify"):
-            gr.Markdown(
-                """
-                ### Verify Identity
-                Record your voice to verify against your enrolled voiceprint.
-                Security: 3 failed attempts = 5-minute lockout. 3-second cooldown between attempts.
-                """
-            )
             with gr.Row():
                 with gr.Column():
-                    verify_audio = gr.Audio(
-                        label="Record Voice",
-                        sources=["microphone"],
-                        type="numpy"
-                    )
                     verify_user_id = gr.Textbox(label="User ID", placeholder="ATM_001")
                     verify_btn = gr.Button("Verify", variant="primary")
                 with gr.Column():
                     verify_result = gr.Textbox(label="Result", lines=6, interactive=False)
-            verify_btn.click(
-                fn=verify_speaker,
-                inputs=[verify_audio, verify_user_id],
-                outputs=verify_result
-            )
-        # ---- MANAGE USERS TAB ----
         with gr.Tab("Users"):
             gr.Markdown("### Manage Enrolled Users")
             list_btn = gr.Button("List All Users")
             users_output = gr.Textbox(label="Enrolled Users", lines=10, interactive=False)
             list_btn.click(fn=list_users, outputs=users_output)
             gr.Markdown("---")
             with gr.Row():
                 with gr.Column():
@@ -607,125 +570,66 @@ with gr.Blocks(
                     del_btn = gr.Button("Delete User", variant="stop")
                     del_result = gr.Textbox(label="Result", interactive=False)
                     del_btn.click(fn=delete_user, inputs=del_user_id, outputs=del_result)
                 with gr.Column():
                     reset_user_id = gr.Textbox(label="User ID to Reset Lockout", placeholder="ATM_001")
                     reset_btn = gr.Button("Reset Lockout", variant="secondary")
                     reset_result = gr.Textbox(label="Result", interactive=False)
                     reset_btn.click(fn=reset_lockout, inputs=reset_user_id, outputs=reset_result)
-        # ---- API DOCS TAB ----
         with gr.Tab("API Docs"):
-            gr.Markdown(
-                """
-                ### REST API Endpoints for Banking Systems
-                **Base URL:** `https://amfafa-voice-authentication-sys.hf.space`
-                ---
-                #### 1. Enroll a Voice Sample
-                ```
-                POST /api/enroll
-                Content-Type: multipart/form-data
-                Fields:
-                  - audio: WAV file (required)
-                  - user_id: string (required)
-                  - full_name: string (required)
-                ```
-                **Response:**
-                ```json
-                {
-                  "success": true,
-                  "message": "Sample 1/6 recorded...",
-                  "user_id": "ATM_001",
-                  "samples_collected": 1,
-                  "samples_required": 6,
-                  "enrollment_complete": false
-                }
-                ```
-                ---
-                #### 2. Verify a Speaker
-                ```
-                POST /api/verify
-                Content-Type: multipart/form-data
-                Fields:
-                  - audio: WAV file (required)
-                  - user_id: string (required)
-                ```
-                **Response:**
-                ```json
-                {
-                  "success": true,
-                  "access_granted": true,
-                  "user_id": "ATM_001",
-                  "full_name": "Jochebed Fafa",
-                  "similarity": 0.4521,
-                  "threshold": 0.35,
-                  "liveness_passed": true,
-                  "antispoofing_passed": true
-                }
-                ```
-                ---
-                #### 3. List Enrolled Users
-                ```
-                GET /api/users
-                ```
-                ---
-                #### 4. Delete a User
-                ```
-                DELETE /api/users/{user_id}
-                ```
-                ---
-                #### 5. Health Check
-                ```
-                GET /api/health
-                ```
-                """
-            )
-# FASTAPI REST API ENDPOINTS
-app = gr.mount_gradio_app(gr.routes.App(), demo, path="/")
-# We use Gradio's underlying FastAPI app to add REST endpoints
-from fastapi import FastAPI, UploadFile, File, Form, HTTPException
-from fastapi.responses import JSONResponse
-from fastapi.middleware.cors import CORSMiddleware
-# Get the FastAPI app from Gradio
-app = demo.app if hasattr(demo, 'app') else None
-# Since Gradio 4.x manages its own FastAPI app, we create custom endpoints
-# by using Gradio's built-in Blocks.launch() with app_kwargs or by
-# adding routes after launch. For Hugging Face Spaces, we use a different approach:
-# We create a FastAPI app, mount Gradio on it, and add our REST routes.
-from fastapi import FastAPI
-api_app = FastAPI(
-    title="ATM Voice Authentication API",
-    description="Voice-Based Speaker Verification System for Banking",
-    version="1.0.0"
-)
-# CORS — allow mobile app and banking systems to connect
-api_app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_credentials=True,
@@ -733,8 +637,7 @@ api_app.add_middleware(
     allow_headers=["*"],
 )
-@api_app.get("/api/health")
 async def health_check():
     return {
         "status": "healthy",
@@ -745,29 +648,19 @@ async def health_check():
         "timestamp": datetime.now().isoformat()
     }
-@api_app.post("/api/enroll")
-async def api_enroll(
-    audio: UploadFile = File(...),
-    user_id: str = Form(...),
-    full_name: str = Form(...)
-):
     try:
         audio_bytes = await audio.read()
-        # Save to temp file
         with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
             tmp.write(audio_bytes)
             tmp_path = tmp.name
         result = enroll_sample(tmp_path, user_id, full_name, 1)
         os.unlink(tmp_path)
         db = load_db()
         uid = user_id.strip().upper()
         samples_collected = db.get(uid, {}).get("samples_collected", 0)
         is_complete = db.get(uid, {}).get("status") == "enrolled"
         return JSONResponse(content={
             "success": "error" not in result.lower() and "failed" not in result.lower(),
             "message": result,
@@ -776,93 +669,49 @@ async def api_enroll(
             "samples_required": NUM_CLEAN_SAMPLES,
             "enrollment_complete": is_complete
         })
     except Exception as e:
-        return JSONResponse(
-            status_code=500,
-            content={"success": False, "message": f"Server error: {str(e)}"}
-        )
-@api_app.post("/api/verify")
-async def api_verify(
-    audio: UploadFile = File(...),
-    user_id: str = Form(...)
-):
     try:
         audio_bytes = await audio.read()
         with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
             tmp.write(audio_bytes)
             tmp_path = tmp.name
-        # Run verification
         uid = user_id.strip().upper()
-        # Security check first
         allowed, sec_msg = check_security(uid)
         if not allowed:
             os.unlink(tmp_path)
-            return JSONResponse(content={
-                "success": True,
-                "access_granted": False,
-                "user_id": uid,
-                "message": sec_msg,
-                "locked": True
-            })
-        # Check user exists
         db = load_db()
         if uid not in db:
             os.unlink(tmp_path)
-            return JSONResponse(content={
-                "success": False,
-                "message": f"User '{uid}' not found. Please enroll first."
-            })
         if db[uid].get("status") != "enrolled":
             os.unlink(tmp_path)
             samples = db[uid].get("samples_collected", 0)
-            return JSONResponse(content={
-                "success": False,
-                "message": f"Enrollment incomplete. {NUM_CLEAN_SAMPLES - samples} more sample(s) needed."
-            })
         wav = load_audio(tmp_path)
         os.unlink(tmp_path)
-        # Liveness
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             record_attempt(uid, False)
-            return JSONResponse(content={
-                "success": True,
-                "access_granted": False,
-                "user_id": uid,
-                "message": live_msg,
-                "liveness_passed": False,
-                "antispoofing_passed": None
-            })
-        # Antispoofing
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             record_attempt(uid, False)
-            return JSONResponse(content={
-                "success": True,
-                "access_granted": False,
-                "user_id": uid,
-                "message": spoof_msg,
-                "liveness_passed": True,
-                "antispoofing_passed": False
-            })
-        # Embedding comparison
         test_emb = extract_embedding(wav)
         stored_emb = np.array(db[uid]["voiceprint"])
-        similarity = float(np.dot(test_emb, stored_emb) / (
-            np.linalg.norm(test_emb) * np.linalg.norm(stored_emb) + 1e-10
-        ))
         granted = similarity >= THRESHOLD
         record_attempt(uid, granted)
@@ -893,15 +742,10 @@ async def api_verify(
                 response["message"] = f"Account locked for {LOCKOUT_MINUTES} minutes."
         return JSONResponse(content=response)
     except Exception as e:
-        return JSONResponse(
-            status_code=500,
-            content={"success": False, "message": f"Server error: {str(e)}"}
-        )
-@api_app.get("/api/users")
 async def api_list_users():
     db = load_db()
     users = []
@@ -916,29 +760,20 @@ async def api_list_users():
         })
     return JSONResponse(content={"success": True, "users": users, "total": len(users)})
-@api_app.delete("/api/users/{user_id}")
 async def api_delete_user(user_id: str):
     result = delete_user(user_id)
     success = "error" not in result.lower()
     return JSONResponse(content={"success": success, "message": result})
-@api_app.post("/api/reset-lockout")
 async def api_reset_lockout(user_id: str = Form(...)):
     result = reset_lockout(user_id)
     return JSONResponse(content={"success": True, "message": result})
-# MOUNT GRADIO ON FASTAPI
-app = gr.mount_gradio_app(api_app, demo, path="/")
 # LAUNCH
 if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import json
 import math
 import time
 import numpy as np
 import random
 import tempfile
 import gradio as gr
 from datetime import datetime, timedelta
 if not hasattr(torchaudio, 'list_audio_backends'):
     torchaudio.list_audio_backends = lambda: ["soundfile"]
 LOCKOUT_MINUTES = 5
 COOLDOWN_SECONDS = 3
 ANTISPOOFING_THRESHOLD = 0.02
 print("Loading AAM-Softmax checkpoint...")
 ckpt = torch.load(CKPT_PATH, map_location=DEVICE)
+# Auto-detect checkpoint format
+print(f"Checkpoint type: {type(ckpt)}")
+if isinstance(ckpt, dict):
+    print(f"Checkpoint keys: {list(ckpt.keys())}")
+# Detect num_classes from checkpoint
+num_classes = 227
+if isinstance(ckpt, dict):
+    if 'num_classes' in ckpt:
+        num_classes = ckpt['num_classes']
+    elif 'num_speakers' in ckpt:
+        num_classes = ckpt['num_speakers']
+# Build classifier
 classifier = SpeakerClassifier(input_dim=768, hidden_dim=512, num_classes=num_classes).to(DEVICE)
+# Load weights - try every possible key format
+loaded = False
+if isinstance(ckpt, dict):
+    # Try common key names for classifier state
+    for key in ['classifier_state', 'classifier_state_dict', 'model_state_dict', 'state_dict', 'model']:
+        if key in ckpt:
+            try:
+                classifier.load_state_dict(ckpt[key])
+                print(f"Loaded classifier from key: '{key}'")
+                loaded = True
+                break
+            except Exception as e:
+                print(f"Key '{key}' found but failed to load: {e}")
+    # If no named key worked, try loading the dict directly (maybe ckpt IS the state_dict)
+    if not loaded:
+        # Check if the keys look like model parameters (contain dots like 'fc1.weight')
+        sample_keys = list(ckpt.keys())[:5]
+        looks_like_state_dict = any('.' in k for k in sample_keys)
+        if looks_like_state_dict:
+            try:
+                classifier.load_state_dict(ckpt)
+                print("Loaded classifier directly from checkpoint dict (it IS the state_dict)")
+                loaded = True
+            except Exception as e:
+                print(f"Direct load failed: {e}")
+                # Try with strict=False
+                try:
+                    classifier.load_state_dict(ckpt, strict=False)
+                    print("Loaded classifier with strict=False")
+                    loaded = True
+                except Exception as e2:
+                    print(f"Strict=False also failed: {e2}")
+    # Try loading base_model state too if present
+    if 'base_model_state' in ckpt:
+        try:
+            base_model.load_state_dict(ckpt['base_model_state'], strict=False)
+            print("Also loaded fine-tuned base model weights")
+        except Exception as e:
+            print(f"Base model load skipped: {e}")
+elif isinstance(ckpt, nn.Module):
+    # Checkpoint is the model itself
+    classifier = ckpt.to(DEVICE)
+    print("Loaded classifier directly (checkpoint is model object)")
+    loaded = True
+if not loaded:
+    print("WARNING: Could not load classifier weights. Using random initialization.")
+    print("The system will still run but verification accuracy will be poor.")
 classifier.eval()
+print(f"Models ready. num_classes={num_classes}, loaded={loaded}")
+# DATABASE
 def load_db():
     if os.path.exists(DB_PATH):
             return json.load(f)
     return {}
 def save_db(db):
     with open(DB_PATH, 'w') as f:
         json.dump(db, f, indent=2, default=str)
 # AUDIO PROCESSING
 def load_audio(audio_input):
     if isinstance(audio_input, tuple):
         sr, audio_np = audio_input
         wav = torch.tensor(audio_np, dtype=torch.float32)
         if wav.shape[0] > 1:
             wav = wav.mean(dim=0, keepdim=True)
         wav = wav.squeeze(0)
         if wav.abs().max() > 1.0:
             wav = wav / 32768.0
         if sr != SAMPLE_RATE:
     else:
         raise ValueError(f"Unsupported audio input type: {type(audio_input)}")
     if wav.shape[0] > MAX_LEN:
         wav = wav[:MAX_LEN]
     elif wav.shape[0] < MAX_LEN:
         wav = F.pad(wav, (0, MAX_LEN - wav.shape[0]))
     return wav
 def extract_embedding(wav_tensor):
     with torch.no_grad():
         wav = wav_tensor.unsqueeze(0).to(DEVICE)
         outputs = base_model(wav)
         embedding = F.normalize(embedding, p=2, dim=1)
     return embedding.squeeze(0).cpu().numpy()
 def add_noise(wav_tensor, noise_level=0.005):
     noise = torch.randn_like(wav_tensor) * noise_level
     return wav_tensor + noise
 # LIVENESS DETECTION
 def check_liveness(wav_tensor):
     wav_np = wav_tensor.numpy()
     rms = np.sqrt(np.mean(wav_np ** 2))
     if rms < 0.001:
         return False, "Audio too quiet — possible silence or empty recording"
     std = np.std(wav_np)
     if std < 0.001:
         return False, "Audio lacks variation — possible synthetic tone"
     zero_crossings = np.sum(np.abs(np.diff(np.sign(wav_np)))) / (2 * len(wav_np))
     if zero_crossings < 0.01:
         return False, "Abnormal audio pattern — possible replay attack"
     non_silent = np.abs(wav_np) > 0.01
     speech_ratio = np.sum(non_silent) / len(wav_np)
     if speech_ratio < 0.1:
         return False, "Insufficient speech content detected"
     return True, "Liveness check passed"
+# ANTISPOOFING
 def check_antispoofing(wav_tensor):
     wav_np = wav_tensor.numpy()
     fft = np.fft.rfft(wav_np)
     magnitude = np.abs(fft)
     magnitude = magnitude[magnitude > 0]
     if len(magnitude) == 0:
         return False, "No frequency content detected"
     geometric_mean = np.exp(np.mean(np.log(magnitude + 1e-10)))
     arithmetic_mean = np.mean(magnitude)
     spectral_flatness = geometric_mean / (arithmetic_mean + 1e-10)
     if spectral_flatness > (1.0 - ANTISPOOFING_THRESHOLD):
         return False, f"Spectral flatness too high ({spectral_flatness:.4f}) — possible synthetic audio"
+    frame_size = 1600
     if len(wav_np) >= frame_size * 3:
         frames = [wav_np[i:i + frame_size] for i in range(0, len(wav_np) - frame_size, frame_size)]
         frame_energies = [np.sqrt(np.mean(f ** 2)) for f in frames]
         energy_std = np.std(frame_energies)
         if energy_std < 0.001:
             return False, "Unnaturally uniform energy — possible synthetic audio"
     return True, "Antispoofing check passed"
+# SECURITY: LOCKOUT & COOLDOWN
+attempt_tracker = {}
 def check_security(user_id):
     now = datetime.now()
     if user_id not in attempt_tracker:
         return True, "OK"
     tracker = attempt_tracker[user_id]
     if "locked_until" in tracker and tracker["locked_until"]:
         locked_until = datetime.fromisoformat(tracker["locked_until"])
         if now < locked_until:
             remaining = (locked_until - now).seconds
             return False, f"Account locked. Try again in {remaining} seconds."
         else:
             tracker["count"] = 0
             tracker["locked_until"] = None
     if "last_attempt" in tracker and tracker["last_attempt"]:
         last = datetime.fromisoformat(tracker["last_attempt"])
         elapsed = (now - last).total_seconds()
         if elapsed < COOLDOWN_SECONDS:
             return False, f"Please wait {COOLDOWN_SECONDS - int(elapsed)} seconds before trying again."
     return True, "OK"
 def record_attempt(user_id, success):
     now = datetime.now()
     if user_id not in attempt_tracker:
         attempt_tracker[user_id] = {"count": 0, "last_attempt": None, "locked_until": None}
     tracker = attempt_tracker[user_id]
     tracker["last_attempt"] = now.isoformat()
     if success:
         tracker["count"] = 0
         tracker["locked_until"] = None
+# ENROLL
 def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=NUM_CLEAN_SAMPLES):
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     if not full_name or not full_name.strip():
     try:
         wav = load_audio(audio_input)
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             return f"Enrollment failed: {live_msg}"
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             return f"Enrollment failed: {spoof_msg}"
         clean_emb = extract_embedding(wav)
         noisy_embeddings = []
         for i in range(NUM_NOISY_COPIES):
             noise_level = 0.003 + (i * 0.002)
             noisy_emb = extract_embedding(noisy_wav)
             noisy_embeddings.append(noisy_emb)
         db = load_db()
         if user_id not in db:
                 "samples_collected": 0
             }
         sample_data = {
             "clean": clean_emb.tolist(),
             "noisy": [e.tolist() for e in noisy_embeddings]
         samples_collected = db[user_id]["samples_collected"]
         if samples_collected >= total_samples:
             all_embeddings = []
             for sample in db[user_id]["sample_embeddings"]:
                 all_embeddings.append(np.array(sample["clean"]))
             db[user_id]["voiceprint"] = avg_embedding.tolist()
             db[user_id]["status"] = "enrolled"
             db[user_id]["completed_at"] = datetime.now().isoformat()
             db[user_id]["sample_embeddings"] = []
             save_db(db)
         return f"Enrollment error: {str(e)}"
+# VERIFY
 def verify_speaker(audio_input, user_id):
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     if audio_input is None:
     user_id = user_id.strip().upper()
     allowed, sec_msg = check_security(user_id)
     if not allowed:
         return f"ACCESS DENIED: {sec_msg}"
     db = load_db()
     if user_id not in db:
         return f"Error: User '{user_id}' not found. Please enroll first."
     try:
         wav = load_audio(audio_input)
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             record_attempt(user_id, False)
             return f"ACCESS DENIED: {live_msg}"
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             record_attempt(user_id, False)
             return f"ACCESS DENIED: {spoof_msg}"
         test_emb = extract_embedding(wav)
         stored_emb = np.array(db[user_id]["voiceprint"])
         return f"Verification error: {str(e)}"
+# USER MANAGEMENT
 def list_users():
     db = load_db()
     if not db:
         return "No users enrolled yet."
     lines = ["=== Enrolled Users ===\n"]
     for uid, data in db.items():
         name = data.get("full_name", "Unknown")
         lines.append(f"ID: {uid} | Name: {name} | Status: {status} | Samples: {samples} | Enrolled: {enrolled}")
     return "\n".join(lines)
 def delete_user(user_id):
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     user_id = user_id.strip().upper()
     db = load_db()
     if user_id not in db:
         return f"Error: User '{user_id}' not found."
     name = db[user_id].get("full_name", user_id)
     del db[user_id]
     save_db(db)
     if user_id in attempt_tracker:
         del attempt_tracker[user_id]
     return f"User '{name}' ({user_id}) deleted successfully."
 def reset_lockout(user_id):
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     user_id = user_id.strip().upper()
     if user_id in attempt_tracker:
         attempt_tracker[user_id] = {"count": 0, "last_attempt": None, "locked_until": None}
 # GRADIO INTERFACE
+with gr.Blocks(title="ATM Voice Authentication System", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # ATM Voice Authentication System
+    ### Voice-Based Speaker Verification for Banking Security
+    **Model:** UniSpeech-SAT + AAM-Softmax | **EER:** 3.94% | **Speakers Trained:** 227 Akan speakers
+    """)
     with gr.Tabs():
         with gr.Tab("Enroll"):
+            gr.Markdown("""
+            ### Enroll New User
+            Record **6 voice samples** to create your voiceprint. Speak naturally for 3-4 seconds each time.
+            The system adds noise augmentation automatically (6 clean + 24 noisy = 30 embeddings averaged).
+            """)
             with gr.Row():
                 with gr.Column():
+                    enroll_audio = gr.Audio(label="Record Voice Sample", sources=["microphone", "upload"], type="numpy")
                     enroll_user_id = gr.Textbox(label="User ID (e.g., ATM_001)", placeholder="ATM_001")
                     enroll_name = gr.Textbox(label="Full Name", placeholder="Jochebed Fafa")
                     enroll_sample_num = gr.Number(label="Sample Number (1-6)", value=1, minimum=1, maximum=6, step=1)
                     enroll_btn = gr.Button("Enroll Sample", variant="primary")
                 with gr.Column():
                     enroll_result = gr.Textbox(label="Result", lines=4, interactive=False)
+            enroll_btn.click(fn=enroll_sample, inputs=[enroll_audio, enroll_user_id, enroll_name, enroll_sample_num], outputs=enroll_result)
         with gr.Tab("Verify"):
+            gr.Markdown("""
+            ### Verify Identity
+            Record your voice to verify against your enrolled voiceprint.
+            Security: 3 failed attempts = 5-minute lockout. 3-second cooldown between attempts.
+            """)
             with gr.Row():
                 with gr.Column():
+                    verify_audio = gr.Audio(label="Record Voice", sources=["microphone", "upload"], type="numpy")
                     verify_user_id = gr.Textbox(label="User ID", placeholder="ATM_001")
                     verify_btn = gr.Button("Verify", variant="primary")
                 with gr.Column():
                     verify_result = gr.Textbox(label="Result", lines=6, interactive=False)
+            verify_btn.click(fn=verify_speaker, inputs=[verify_audio, verify_user_id], outputs=verify_result)
         with gr.Tab("Users"):
             gr.Markdown("### Manage Enrolled Users")
             list_btn = gr.Button("List All Users")
             users_output = gr.Textbox(label="Enrolled Users", lines=10, interactive=False)
             list_btn.click(fn=list_users, outputs=users_output)
             gr.Markdown("---")
             with gr.Row():
                 with gr.Column():
                     del_btn = gr.Button("Delete User", variant="stop")
                     del_result = gr.Textbox(label="Result", interactive=False)
                     del_btn.click(fn=delete_user, inputs=del_user_id, outputs=del_result)
                 with gr.Column():
                     reset_user_id = gr.Textbox(label="User ID to Reset Lockout", placeholder="ATM_001")
                     reset_btn = gr.Button("Reset Lockout", variant="secondary")
                     reset_result = gr.Textbox(label="Result", interactive=False)
                     reset_btn.click(fn=reset_lockout, inputs=reset_user_id, outputs=reset_result)
         with gr.Tab("API Docs"):
+            gr.Markdown("""
+            ### REST API Endpoints for Banking Systems
+            **Base URL:** `https://amfafa-voice-authentication-sys.hf.space`
+            ---
+            #### 1. Enroll a Voice Sample
+            ```
+            POST /api/enroll
+            Content-Type: multipart/form-data
+            Fields: audio (WAV file), user_id (string), full_name (string)
+            ```
+            #### 2. Verify a Speaker
+            ```
+            POST /api/verify
+            Content-Type: multipart/form-data
+            Fields: audio (WAV file), user_id (string)
+            ```
+            #### 3. List Enrolled Users
+            ```
+            GET /api/users
+            ```
+            #### 4. Delete a User
+            ```
+            DELETE /api/users/{user_id}
+            ```
+            #### 5. Health Check
+            ```
+            GET /api/health
+            ```
+            #### 6. Reset Lockout
+            ```
+            POST /api/reset-lockout
+            Field: user_id (string)
+            ```
+            """)
+# REST API ENDPOINTS
+from fastapi import UploadFile, File, Form
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+fastapi_app = demo.app
+fastapi_app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_credentials=True,
     allow_headers=["*"],
 )
+@fastapi_app.get("/api/health")
 async def health_check():
     return {
         "status": "healthy",
         "timestamp": datetime.now().isoformat()
     }
+@fastapi_app.post("/api/enroll")
+async def api_enroll(audio: UploadFile = File(...), user_id: str = Form(...), full_name: str = Form(...)):
     try:
         audio_bytes = await audio.read()
         with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
             tmp.write(audio_bytes)
             tmp_path = tmp.name
         result = enroll_sample(tmp_path, user_id, full_name, 1)
         os.unlink(tmp_path)
         db = load_db()
         uid = user_id.strip().upper()
         samples_collected = db.get(uid, {}).get("samples_collected", 0)
         is_complete = db.get(uid, {}).get("status") == "enrolled"
         return JSONResponse(content={
             "success": "error" not in result.lower() and "failed" not in result.lower(),
             "message": result,
             "samples_required": NUM_CLEAN_SAMPLES,
             "enrollment_complete": is_complete
         })
     except Exception as e:
+        return JSONResponse(status_code=500, content={"success": False, "message": f"Server error: {str(e)}"})
+@fastapi_app.post("/api/verify")
+async def api_verify(audio: UploadFile = File(...), user_id: str = Form(...)):
     try:
         audio_bytes = await audio.read()
         with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
             tmp.write(audio_bytes)
             tmp_path = tmp.name
         uid = user_id.strip().upper()
         allowed, sec_msg = check_security(uid)
         if not allowed:
             os.unlink(tmp_path)
+            return JSONResponse(content={"success": True, "access_granted": False, "user_id": uid, "message": sec_msg, "locked": True})
         db = load_db()
         if uid not in db:
             os.unlink(tmp_path)
+            return JSONResponse(content={"success": False, "message": f"User '{uid}' not found. Please enroll first."})
         if db[uid].get("status") != "enrolled":
             os.unlink(tmp_path)
             samples = db[uid].get("samples_collected", 0)
+            return JSONResponse(content={"success": False, "message": f"Enrollment incomplete. {NUM_CLEAN_SAMPLES - samples} more sample(s) needed."})
         wav = load_audio(tmp_path)
         os.unlink(tmp_path)
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             record_attempt(uid, False)
+            return JSONResponse(content={"success": True, "access_granted": False, "user_id": uid, "message": live_msg, "liveness_passed": False, "antispoofing_passed": None})
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             record_attempt(uid, False)
+            return JSONResponse(content={"success": True, "access_granted": False, "user_id": uid, "message": spoof_msg, "liveness_passed": True, "antispoofing_passed": False})
         test_emb = extract_embedding(wav)
         stored_emb = np.array(db[uid]["voiceprint"])
+        similarity = float(np.dot(test_emb, stored_emb) / (np.linalg.norm(test_emb) * np.linalg.norm(stored_emb) + 1e-10))
         granted = similarity >= THRESHOLD
         record_attempt(uid, granted)
                 response["message"] = f"Account locked for {LOCKOUT_MINUTES} minutes."
         return JSONResponse(content=response)
     except Exception as e:
+        return JSONResponse(status_code=500, content={"success": False, "message": f"Server error: {str(e)}"})
+@fastapi_app.get("/api/users")
 async def api_list_users():
     db = load_db()
     users = []
         })
     return JSONResponse(content={"success": True, "users": users, "total": len(users)})
+@fastapi_app.delete("/api/users/{user_id}")
 async def api_delete_user(user_id: str):
     result = delete_user(user_id)
     success = "error" not in result.lower()
     return JSONResponse(content={"success": success, "message": result})
+@fastapi_app.post("/api/reset-lockout")
 async def api_reset_lockout(user_id: str = Form(...)):
     result = reset_lockout(user_id)
     return JSONResponse(content={"success": True, "message": result})
 # LAUNCH
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)