Spaces:

amfafa
/

voice_authentication-sys

Sleeping

App Files Files Community

amfafa commited on 5 days ago

Commit

a8a3da6

verified ·

1 Parent(s): 6a1ecee

Update app.py

Browse files

Files changed (1) hide show

app.py +344 -157

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import json
 import math
 import time
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -17,9 +18,7 @@ if not hasattr(torchaudio, 'list_audio_backends'):
 from transformers import AutoModel
-# CONFIGURATION
 CKPT_PATH = 'aam_best.pt'
 DB_PATH = 'voiceprint_db.json'
 MODEL_NAME = 'microsoft/unispeech-sat-base-sv'
@@ -35,10 +34,27 @@ LOCKOUT_MINUTES = 5
 COOLDOWN_SECONDS = 3
 ANTISPOOFING_THRESHOLD = 0.02
-# AAM-SOFTMAX MODEL
 class AAMSoftmax(nn.Module):
     def __init__(self, in_features, num_classes, margin=0.2, scale=30.0):
         super().__init__()
@@ -80,9 +96,7 @@ class SpeakerClassifier(nn.Module):
         return self.relu(self.fc1(x))
-# LOAD MODELS
 print("Loading UniSpeech-SAT base model...")
 base_model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
 base_model.eval()
@@ -91,13 +105,10 @@ for param in base_model.parameters():
 print("Loading AAM-Softmax checkpoint...")
 ckpt = torch.load(CKPT_PATH, map_location=DEVICE)
-# Auto-detect checkpoint format
 print(f"Checkpoint type: {type(ckpt)}")
 if isinstance(ckpt, dict):
     print(f"Checkpoint keys: {list(ckpt.keys())}")
-# Detect num_classes from checkpoint
 num_classes = 227
 if isinstance(ckpt, dict):
     if 'num_classes' in ckpt:
@@ -105,13 +116,10 @@ if isinstance(ckpt, dict):
     elif 'num_speakers' in ckpt:
         num_classes = ckpt['num_speakers']
-# Build classifier
 classifier = SpeakerClassifier(input_dim=768, hidden_dim=512, num_classes=num_classes).to(DEVICE)
-# Load weights - try every possible key format
 loaded = False
 if isinstance(ckpt, dict):
-    # Try common key names for classifier state
     for key in ['classifier_state', 'classifier_state_dict', 'model_state_dict', 'state_dict', 'model']:
         if key in ckpt:
             try:
@@ -120,53 +128,42 @@ if isinstance(ckpt, dict):
                 loaded = True
                 break
             except Exception as e:
-                print(f"Key '{key}' found but failed to load: {e}")
-    # If no named key worked, try loading the dict directly (maybe ckpt IS the state_dict)
     if not loaded:
-        # Check if the keys look like model parameters (contain dots like 'fc1.weight')
         sample_keys = list(ckpt.keys())[:5]
-        looks_like_state_dict = any('.' in k for k in sample_keys)
-        if looks_like_state_dict:
             try:
                 classifier.load_state_dict(ckpt)
-                print("Loaded classifier directly from checkpoint dict (it IS the state_dict)")
                 loaded = True
-            except Exception as e:
-                print(f"Direct load failed: {e}")
-                # Try with strict=False
                 try:
                     classifier.load_state_dict(ckpt, strict=False)
                     print("Loaded classifier with strict=False")
                     loaded = True
                 except Exception as e2:
-                    print(f"Strict=False also failed: {e2}")
-    # Try loading base_model state too if present
     if 'base_model_state' in ckpt:
         try:
             base_model.load_state_dict(ckpt['base_model_state'], strict=False)
-            print("Also loaded fine-tuned base model weights")
-        except Exception as e:
-            print(f"Base model load skipped: {e}")
 elif isinstance(ckpt, nn.Module):
-    # Checkpoint is the model itself
     classifier = ckpt.to(DEVICE)
-    print("Loaded classifier directly (checkpoint is model object)")
     loaded = True
 if not loaded:
-    print("WARNING: Could not load classifier weights. Using random initialization.")
-    print("The system will still run but verification accuracy will be poor.")
 classifier.eval()
 print(f"Models ready. num_classes={num_classes}, loaded={loaded}")
-# DATABASE
 def load_db():
     if os.path.exists(DB_PATH):
         with open(DB_PATH, 'r') as f:
@@ -178,9 +175,7 @@ def save_db(db):
         json.dump(db, f, indent=2, default=str)
-# AUDIO PROCESSING
 def load_audio(audio_input):
     if isinstance(audio_input, tuple):
         sr, audio_np = audio_input
@@ -235,54 +230,49 @@ def add_noise(wav_tensor, noise_level=0.005):
     return wav_tensor + noise
-# LIVENESS DETECTION
 def check_liveness(wav_tensor):
     wav_np = wav_tensor.numpy()
     rms = np.sqrt(np.mean(wav_np ** 2))
     if rms < 0.001:
-        return False, "Audio too quiet — possible silence or empty recording"
     std = np.std(wav_np)
     if std < 0.001:
-        return False, "Audio lacks variation — possible synthetic tone"
     zero_crossings = np.sum(np.abs(np.diff(np.sign(wav_np)))) / (2 * len(wav_np))
     if zero_crossings < 0.01:
-        return False, "Abnormal audio pattern — possible replay attack"
     non_silent = np.abs(wav_np) > 0.01
     speech_ratio = np.sum(non_silent) / len(wav_np)
     if speech_ratio < 0.1:
-        return False, "Insufficient speech content detected"
     return True, "Liveness check passed"
-# ANTISPOOFING
 def check_antispoofing(wav_tensor):
     wav_np = wav_tensor.numpy()
     fft = np.fft.rfft(wav_np)
     magnitude = np.abs(fft)
     magnitude = magnitude[magnitude > 0]
     if len(magnitude) == 0:
-        return False, "No frequency content detected"
     geometric_mean = np.exp(np.mean(np.log(magnitude + 1e-10)))
     arithmetic_mean = np.mean(magnitude)
     spectral_flatness = geometric_mean / (arithmetic_mean + 1e-10)
     if spectral_flatness > (1.0 - ANTISPOOFING_THRESHOLD):
-        return False, f"Spectral flatness too high ({spectral_flatness:.4f}) — possible synthetic audio"
     frame_size = 1600
     if len(wav_np) >= frame_size * 3:
         frames = [wav_np[i:i + frame_size] for i in range(0, len(wav_np) - frame_size, frame_size)]
         frame_energies = [np.sqrt(np.mean(f ** 2)) for f in frames]
         energy_std = np.std(frame_energies)
         if energy_std < 0.001:
-            return False, "Unnaturally uniform energy — possible synthetic audio"
     return True, "Antispoofing check passed"
-# SECURITY: LOCKOUT & COOLDOWN
 attempt_tracker = {}
 def check_security(user_id):
@@ -302,7 +292,7 @@ def check_security(user_id):
         last = datetime.fromisoformat(tracker["last_attempt"])
         elapsed = (now - last).total_seconds()
         if elapsed < COOLDOWN_SECONDS:
-            return False, f"Please wait {COOLDOWN_SECONDS - int(elapsed)} seconds before trying again."
     return True, "OK"
 def record_attempt(user_id, success):
@@ -320,33 +310,61 @@ def record_attempt(user_id, success):
             tracker["locked_until"] = (now + timedelta(minutes=LOCKOUT_MINUTES)).isoformat()
-# ENROLL
 def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=NUM_CLEAN_SAMPLES):
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     if not full_name or not full_name.strip():
         return "Error: Full Name is required."
     if audio_input is None:
-        return "Error: No audio recorded. Please record your voice."
     user_id = user_id.strip().upper()
     full_name = full_name.strip()
     try:
         wav = load_audio(audio_input)
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             return f"Enrollment failed: {live_msg}"
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             return f"Enrollment failed: {spoof_msg}"
         clean_emb = extract_embedding(wav)
         noisy_embeddings = []
         for i in range(NUM_NOISY_COPIES):
             noise_level = 0.003 + (i * 0.002)
@@ -355,7 +373,6 @@ def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=
             noisy_embeddings.append(noisy_emb)
         db = load_db()
         if user_id not in db:
             db[user_id] = {
                 "full_name": full_name,
@@ -373,7 +390,6 @@ def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=
         db[user_id]["sample_embeddings"].append(sample_data)
         db[user_id]["samples_collected"] = len(db[user_id]["sample_embeddings"])
         db[user_id]["full_name"] = full_name
         samples_collected = db[user_id]["samples_collected"]
         if samples_collected >= total_samples:
@@ -382,44 +398,37 @@ def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=
                 all_embeddings.append(np.array(sample["clean"]))
                 for noisy in sample["noisy"]:
                     all_embeddings.append(np.array(noisy))
             avg_embedding = np.mean(all_embeddings, axis=0)
             avg_embedding = avg_embedding / (np.linalg.norm(avg_embedding) + 1e-10)
             db[user_id]["voiceprint"] = avg_embedding.tolist()
             db[user_id]["status"] = "enrolled"
             db[user_id]["completed_at"] = datetime.now().isoformat()
             db[user_id]["sample_embeddings"] = []
             save_db(db)
             return f"Enrollment COMPLETE for {full_name} ({user_id}). Voiceprint created from {total_samples} samples ({total_samples * (1 + NUM_NOISY_COPIES)} embeddings averaged)."
         else:
             save_db(db)
             remaining = total_samples - samples_collected
             return f"Sample {samples_collected}/{total_samples} recorded for {full_name}. {remaining} more sample(s) needed."
     except Exception as e:
         return f"Enrollment error: {str(e)}"
-# VERIFY
 def verify_speaker(audio_input, user_id):
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     if audio_input is None:
-        return "Error: No audio recorded. Please speak into the microphone."
     user_id = user_id.strip().upper()
     allowed, sec_msg = check_security(user_id)
     if not allowed:
         return f"ACCESS DENIED: {sec_msg}"
     db = load_db()
     if user_id not in db:
-        return f"Error: User '{user_id}' not found. Please enroll first."
     if db[user_id].get("status") != "enrolled":
         samples = db[user_id].get("samples_collected", 0)
         remaining = NUM_CLEAN_SAMPLES - samples
@@ -427,12 +436,10 @@ def verify_speaker(audio_input, user_id):
     try:
         wav = load_audio(audio_input)
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             record_attempt(user_id, False)
             return f"ACCESS DENIED: {live_msg}"
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             record_attempt(user_id, False)
@@ -440,42 +447,29 @@ def verify_speaker(audio_input, user_id):
         test_emb = extract_embedding(wav)
         stored_emb = np.array(db[user_id]["voiceprint"])
-        similarity = float(np.dot(test_emb, stored_emb) / (
-            np.linalg.norm(test_emb) * np.linalg.norm(stored_emb) + 1e-10
-        ))
         if similarity >= THRESHOLD:
             record_attempt(user_id, True)
             full_name = db[user_id].get("full_name", user_id)
-            return (
-                f"ACCESS GRANTED\n"
-                f"Welcome, {full_name}\n"
-                f"Confidence: {similarity:.4f} (threshold: {THRESHOLD})\n"
-                f"Liveness: Passed | Antispoofing: Passed"
-            )
         else:
             record_attempt(user_id, False)
             tracker = attempt_tracker.get(user_id, {})
             attempts_left = MAX_ATTEMPTS - tracker.get("count", 0)
-            msg = (
-                f"ACCESS DENIED\n"
-                f"Voice does not match registered voiceprint.\n"
-                f"Similarity: {similarity:.4f} (threshold: {THRESHOLD})\n"
-            )
             if attempts_left > 0:
                 msg += f"Attempts remaining: {attempts_left}"
             else:
                 msg += f"Account locked for {LOCKOUT_MINUTES} minutes."
             return msg
     except Exception as e:
         return f"Verification error: {str(e)}"
-# USER MANAGEMENT
 def list_users():
     db = load_db()
     if not db:
@@ -501,7 +495,7 @@ def delete_user(user_id):
     save_db(db)
     if user_id in attempt_tracker:
         del attempt_tracker[user_id]
-    return f"User '{name}' ({user_id}) deleted successfully."
 def reset_lockout(user_id):
     if not user_id or not user_id.strip():
@@ -510,16 +504,16 @@ def reset_lockout(user_id):
     if user_id in attempt_tracker:
         attempt_tracker[user_id] = {"count": 0, "last_attempt": None, "locked_until": None}
         return f"Lockout reset for {user_id}."
-    return f"No lockout record found for {user_id}."
-# GRADIO INTERFACE
 with gr.Blocks(title="ATM Voice Authentication System", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    #  Voice Authentication System
     """)
     with gr.Tabs():
@@ -528,7 +522,6 @@ with gr.Blocks(title="ATM Voice Authentication System", theme=gr.themes.Soft())
             gr.Markdown("""
             ### Enroll New User
             Record **6 voice samples** to create your voiceprint. Speak naturally for 3-4 seconds each time.
-            The system adds noise augmentation automatically (6 clean + 24 noisy = 30 embeddings averaged).
             """)
             with gr.Row():
                 with gr.Column():
@@ -545,7 +538,6 @@ with gr.Blocks(title="ATM Voice Authentication System", theme=gr.themes.Soft())
             gr.Markdown("""
             ### Verify Identity
             Record your voice to verify against your enrolled voiceprint.
-            Security: 3 failed attempts = 5-minute lockout. 3-second cooldown between attempts.
             """)
             with gr.Row():
                 with gr.Column():
@@ -576,51 +568,42 @@ with gr.Blocks(title="ATM Voice Authentication System", theme=gr.themes.Soft())
         with gr.Tab("API Docs"):
             gr.Markdown("""
-            ### REST API Endpoints for Banking Systems
             **Base URL:** `https://amfafa-voice-authentication-sys.hf.space`
             ---
-            #### 1. Enroll a Voice Sample
-            ```
-            POST /api/enroll
-            Content-Type: multipart/form-data
-            Fields: audio (WAV file), user_id (string), full_name (string)
-            ```
-            #### 2. Verify a Speaker
-            ```
-            POST /api/verify
-            Content-Type: multipart/form-data
-            Fields: audio (WAV file), user_id (string)
-            ```
-            #### 3. List Enrolled Users
-            ```
-            GET /api/users
-            ```
-            #### 4. Delete a User
-            ```
-            DELETE /api/users/{user_id}
-            ```
-            #### 5. Health Check
-            ```
-            GET /api/health
-            ```
-            #### 6. Reset Lockout
-            ```
-            POST /api/reset-lockout
-            Field: user_id (string)
-            ```
-            """)
-# REST API ENDPOINTS
 from fastapi import UploadFile, File, Form
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
@@ -635,17 +618,18 @@ fastapi_app.add_middleware(
     allow_headers=["*"],
 )
 @fastapi_app.get("/api/health")
 async def health_check():
     return {
         "status": "healthy",
         "model": "UniSpeech-SAT + AAM-Softmax",
-        "eer": "3.94%",
         "threshold": THRESHOLD,
         "device": str(DEVICE),
         "timestamp": datetime.now().isoformat()
     }
 @fastapi_app.post("/api/enroll")
 async def api_enroll(audio: UploadFile = File(...), user_id: str = Form(...), full_name: str = Form(...)):
     try:
@@ -670,6 +654,7 @@ async def api_enroll(audio: UploadFile = File(...), user_id: str = Form(...), fu
     except Exception as e:
         return JSONResponse(status_code=500, content={"success": False, "message": f"Server error: {str(e)}"})
 @fastapi_app.post("/api/verify")
 async def api_verify(audio: UploadFile = File(...), user_id: str = Form(...)):
     try:
@@ -687,12 +672,10 @@ async def api_verify(audio: UploadFile = File(...), user_id: str = Form(...)):
         db = load_db()
         if uid not in db:
             os.unlink(tmp_path)
-            return JSONResponse(content={"success": False, "message": f"User '{uid}' not found. Please enroll first."})
         if db[uid].get("status") != "enrolled":
             os.unlink(tmp_path)
-            samples = db[uid].get("samples_collected", 0)
-            return JSONResponse(content={"success": False, "message": f"Enrollment incomplete. {NUM_CLEAN_SAMPLES - samples} more sample(s) needed."})
         wav = load_audio(tmp_path)
         os.unlink(tmp_path)
@@ -700,12 +683,12 @@ async def api_verify(audio: UploadFile = File(...), user_id: str = Form(...)):
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             record_attempt(uid, False)
-            return JSONResponse(content={"success": True, "access_granted": False, "user_id": uid, "message": live_msg, "liveness_passed": False, "antispoofing_passed": None})
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             record_attempt(uid, False)
-            return JSONResponse(content={"success": True, "access_granted": False, "user_id": uid, "message": spoof_msg, "liveness_passed": True, "antispoofing_passed": False})
         test_emb = extract_embedding(wav)
         stored_emb = np.array(db[uid]["voiceprint"])
@@ -713,10 +696,8 @@ async def api_verify(audio: UploadFile = File(...), user_id: str = Form(...)):
         granted = similarity >= THRESHOLD
         record_attempt(uid, granted)
         tracker = attempt_tracker.get(uid, {})
-        attempts_used = tracker.get("count", 0)
-        attempts_remaining = max(0, MAX_ATTEMPTS - attempts_used)
         response = {
             "success": True,
@@ -730,19 +711,17 @@ async def api_verify(audio: UploadFile = File(...), user_id: str = Form(...)):
             "attempts_remaining": attempts_remaining if not granted else MAX_ATTEMPTS,
             "locked": attempts_remaining == 0 and not granted
         }
         if granted:
             response["message"] = "Access granted. Voice verified."
         else:
-            if attempts_remaining > 0:
-                response["message"] = f"Voice does not match. {attempts_remaining} attempt(s) remaining."
-            else:
-                response["message"] = f"Account locked for {LOCKOUT_MINUTES} minutes."
         return JSONResponse(content=response)
     except Exception as e:
         return JSONResponse(status_code=500, content={"success": False, "message": f"Server error: {str(e)}"})
 @fastapi_app.get("/api/users")
 async def api_list_users():
     db = load_db()
@@ -758,20 +737,228 @@ async def api_list_users():
         })
     return JSONResponse(content={"success": True, "users": users, "total": len(users)})
 @fastapi_app.delete("/api/users/{user_id}")
 async def api_delete_user(user_id: str):
     result = delete_user(user_id)
     success = "error" not in result.lower()
     return JSONResponse(content={"success": success, "message": result})
 @fastapi_app.post("/api/reset-lockout")
 async def api_reset_lockout(user_id: str = Form(...)):
     result = reset_lockout(user_id)
     return JSONResponse(content={"success": True, "message": result})
-# LAUNCH
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import json
 import math
 import time
+import uuid
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from transformers import AutoModel
+# Config
 CKPT_PATH = 'aam_best.pt'
 DB_PATH = 'voiceprint_db.json'
 MODEL_NAME = 'microsoft/unispeech-sat-base-sv'
 COOLDOWN_SECONDS = 3
 ANTISPOOFING_THRESHOLD = 0.02
+# Challenge word pool (simple, short, easy to pronounce)
+CHALLENGE_WORDS = [
+    'Red', 'Blue', 'Gold', 'Star', 'Water',
+    'Moon', 'Fire', 'Green', 'Black', 'White',
+    'Sun', 'Rain', 'Tree', 'Fish', 'Bird',
+    'Stone', 'Wind', 'Cloud', 'Light', 'Sound'
+]
+# Session steps
+SESSION_STEPS = {
+    'STARTED': 'started',
+    'VERIFIED': 'verified',
+    'LIVENESS_PENDING': 'liveness_pending',
+    'AUTHENTICATED': 'authenticated',
+    'TRANSACTION_PENDING': 'transaction_pending',
+    'COMPLETE': 'complete',
+    'DENIED': 'denied'
+}
+# AAM-Softmax model
 class AAMSoftmax(nn.Module):
     def __init__(self, in_features, num_classes, margin=0.2, scale=30.0):
         super().__init__()
         return self.relu(self.fc1(x))
+# Load models
 print("Loading UniSpeech-SAT base model...")
 base_model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
 base_model.eval()
 print("Loading AAM-Softmax checkpoint...")
 ckpt = torch.load(CKPT_PATH, map_location=DEVICE)
 print(f"Checkpoint type: {type(ckpt)}")
 if isinstance(ckpt, dict):
     print(f"Checkpoint keys: {list(ckpt.keys())}")
 num_classes = 227
 if isinstance(ckpt, dict):
     if 'num_classes' in ckpt:
     elif 'num_speakers' in ckpt:
         num_classes = ckpt['num_speakers']
 classifier = SpeakerClassifier(input_dim=768, hidden_dim=512, num_classes=num_classes).to(DEVICE)
 loaded = False
 if isinstance(ckpt, dict):
     for key in ['classifier_state', 'classifier_state_dict', 'model_state_dict', 'state_dict', 'model']:
         if key in ckpt:
             try:
                 loaded = True
                 break
             except Exception as e:
+                print(f"Key '{key}' found but failed: {e}")
     if not loaded:
         sample_keys = list(ckpt.keys())[:5]
+        if any('.' in k for k in sample_keys):
             try:
                 classifier.load_state_dict(ckpt)
+                print("Loaded classifier directly from checkpoint dict")
                 loaded = True
+            except:
                 try:
                     classifier.load_state_dict(ckpt, strict=False)
                     print("Loaded classifier with strict=False")
                     loaded = True
                 except Exception as e2:
+                    print(f"Direct load failed: {e2}")
     if 'base_model_state' in ckpt:
         try:
             base_model.load_state_dict(ckpt['base_model_state'], strict=False)
+            print("Loaded fine-tuned base model weights")
+        except:
+            pass
 elif isinstance(ckpt, nn.Module):
     classifier = ckpt.to(DEVICE)
+    print("Loaded classifier directly (model object)")
     loaded = True
 if not loaded:
+    print("WARNING: Could not load classifier weights. Using random init.")
 classifier.eval()
 print(f"Models ready. num_classes={num_classes}, loaded={loaded}")
+# Database
 def load_db():
     if os.path.exists(DB_PATH):
         with open(DB_PATH, 'r') as f:
         json.dump(db, f, indent=2, default=str)
+# Audio processing
 def load_audio(audio_input):
     if isinstance(audio_input, tuple):
         sr, audio_np = audio_input
     return wav_tensor + noise
+# Liveness detection
 def check_liveness(wav_tensor):
     wav_np = wav_tensor.numpy()
     rms = np.sqrt(np.mean(wav_np ** 2))
     if rms < 0.001:
+        return False, "Audio too quiet"
     std = np.std(wav_np)
     if std < 0.001:
+        return False, "Audio lacks variation"
     zero_crossings = np.sum(np.abs(np.diff(np.sign(wav_np)))) / (2 * len(wav_np))
     if zero_crossings < 0.01:
+        return False, "Abnormal audio pattern"
     non_silent = np.abs(wav_np) > 0.01
     speech_ratio = np.sum(non_silent) / len(wav_np)
     if speech_ratio < 0.1:
+        return False, "Insufficient speech content"
     return True, "Liveness check passed"
+# Antispoofing
 def check_antispoofing(wav_tensor):
     wav_np = wav_tensor.numpy()
     fft = np.fft.rfft(wav_np)
     magnitude = np.abs(fft)
     magnitude = magnitude[magnitude > 0]
     if len(magnitude) == 0:
+        return False, "No frequency content"
     geometric_mean = np.exp(np.mean(np.log(magnitude + 1e-10)))
     arithmetic_mean = np.mean(magnitude)
     spectral_flatness = geometric_mean / (arithmetic_mean + 1e-10)
     if spectral_flatness > (1.0 - ANTISPOOFING_THRESHOLD):
+        return False, "Possible synthetic audio"
     frame_size = 1600
     if len(wav_np) >= frame_size * 3:
         frames = [wav_np[i:i + frame_size] for i in range(0, len(wav_np) - frame_size, frame_size)]
         frame_energies = [np.sqrt(np.mean(f ** 2)) for f in frames]
         energy_std = np.std(frame_energies)
         if energy_std < 0.001:
+            return False, "Unnaturally uniform energy"
     return True, "Antispoofing check passed"
+# Security: lockout and cooldown
 attempt_tracker = {}
 def check_security(user_id):
         last = datetime.fromisoformat(tracker["last_attempt"])
         elapsed = (now - last).total_seconds()
         if elapsed < COOLDOWN_SECONDS:
+            return False, f"Please wait {COOLDOWN_SECONDS - int(elapsed)} seconds."
     return True, "OK"
 def record_attempt(user_id, success):
             tracker["locked_until"] = (now + timedelta(minutes=LOCKOUT_MINUTES)).isoformat()
+# Generate random challenge (2 words from pool)
+def generate_challenge():
+    words = random.sample(CHALLENGE_WORDS, 2)
+    return ' '.join(words)
+# Session storage (in-memory)
+sessions = {}
+def create_session(user_id):
+    session_id = str(uuid.uuid4())
+    sessions[session_id] = {
+        "session_id": session_id,
+        "user_id": user_id.strip().upper(),
+        "step": SESSION_STEPS['STARTED'],
+        "challenge_phrase": None,
+        "full_name": None,
+        "similarity": None,
+        "created_at": datetime.now().isoformat(),
+        "expires_at": (datetime.now() + timedelta(minutes=5)).isoformat()
+    }
+    return sessions[session_id]
+def get_session(session_id):
+    if session_id not in sessions:
+        return None
+    session = sessions[session_id]
+    if datetime.now() > datetime.fromisoformat(session["expires_at"]):
+        del sessions[session_id]
+        return None
+    return session
+# Enroll
 def enroll_sample(audio_input, user_id, full_name, sample_number, total_samples=NUM_CLEAN_SAMPLES):
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     if not full_name or not full_name.strip():
         return "Error: Full Name is required."
     if audio_input is None:
+        return "Error: No audio recorded."
     user_id = user_id.strip().upper()
     full_name = full_name.strip()
     try:
         wav = load_audio(audio_input)
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             return f"Enrollment failed: {live_msg}"
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             return f"Enrollment failed: {spoof_msg}"
         clean_emb = extract_embedding(wav)
         noisy_embeddings = []
         for i in range(NUM_NOISY_COPIES):
             noise_level = 0.003 + (i * 0.002)
             noisy_embeddings.append(noisy_emb)
         db = load_db()
         if user_id not in db:
             db[user_id] = {
                 "full_name": full_name,
         db[user_id]["sample_embeddings"].append(sample_data)
         db[user_id]["samples_collected"] = len(db[user_id]["sample_embeddings"])
         db[user_id]["full_name"] = full_name
         samples_collected = db[user_id]["samples_collected"]
         if samples_collected >= total_samples:
                 all_embeddings.append(np.array(sample["clean"]))
                 for noisy in sample["noisy"]:
                     all_embeddings.append(np.array(noisy))
             avg_embedding = np.mean(all_embeddings, axis=0)
             avg_embedding = avg_embedding / (np.linalg.norm(avg_embedding) + 1e-10)
             db[user_id]["voiceprint"] = avg_embedding.tolist()
             db[user_id]["status"] = "enrolled"
             db[user_id]["completed_at"] = datetime.now().isoformat()
             db[user_id]["sample_embeddings"] = []
             save_db(db)
             return f"Enrollment COMPLETE for {full_name} ({user_id}). Voiceprint created from {total_samples} samples ({total_samples * (1 + NUM_NOISY_COPIES)} embeddings averaged)."
         else:
             save_db(db)
             remaining = total_samples - samples_collected
             return f"Sample {samples_collected}/{total_samples} recorded for {full_name}. {remaining} more sample(s) needed."
     except Exception as e:
         return f"Enrollment error: {str(e)}"
+# Verify
 def verify_speaker(audio_input, user_id):
     if not user_id or not user_id.strip():
         return "Error: User ID is required."
     if audio_input is None:
+        return "Error: No audio recorded."
     user_id = user_id.strip().upper()
     allowed, sec_msg = check_security(user_id)
     if not allowed:
         return f"ACCESS DENIED: {sec_msg}"
     db = load_db()
     if user_id not in db:
+        return f"Error: User '{user_id}' not found."
     if db[user_id].get("status") != "enrolled":
         samples = db[user_id].get("samples_collected", 0)
         remaining = NUM_CLEAN_SAMPLES - samples
     try:
         wav = load_audio(audio_input)
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             record_attempt(user_id, False)
             return f"ACCESS DENIED: {live_msg}"
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             record_attempt(user_id, False)
         test_emb = extract_embedding(wav)
         stored_emb = np.array(db[user_id]["voiceprint"])
+        similarity = float(np.dot(test_emb, stored_emb) / (np.linalg.norm(test_emb) * np.linalg.norm(stored_emb) + 1e-10))
         if similarity >= THRESHOLD:
             record_attempt(user_id, True)
             full_name = db[user_id].get("full_name", user_id)
+            return (f"ACCESS GRANTED\nWelcome, {full_name}\n"
+                    f"Confidence: {similarity:.4f} (threshold: {THRESHOLD})\n"
+                    f"Liveness: Passed | Antispoofing: Passed")
         else:
             record_attempt(user_id, False)
             tracker = attempt_tracker.get(user_id, {})
             attempts_left = MAX_ATTEMPTS - tracker.get("count", 0)
+            msg = f"ACCESS DENIED\nVoice does not match.\nSimilarity: {similarity:.4f} (threshold: {THRESHOLD})\n"
             if attempts_left > 0:
                 msg += f"Attempts remaining: {attempts_left}"
             else:
                 msg += f"Account locked for {LOCKOUT_MINUTES} minutes."
             return msg
     except Exception as e:
         return f"Verification error: {str(e)}"
+# User management
 def list_users():
     db = load_db()
     if not db:
     save_db(db)
     if user_id in attempt_tracker:
         del attempt_tracker[user_id]
+    return f"User '{name}' ({user_id}) deleted."
 def reset_lockout(user_id):
     if not user_id or not user_id.strip():
     if user_id in attempt_tracker:
         attempt_tracker[user_id] = {"count": 0, "last_attempt": None, "locked_until": None}
         return f"Lockout reset for {user_id}."
+    return f"No lockout record for {user_id}."
+# Gradio interface
 with gr.Blocks(title="ATM Voice Authentication System", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # ATM Voice Authentication System
+    ### Voice-Based Speaker Verification for Banking Security
+    Voice biometric authentication system for secure ATM access
     """)
     with gr.Tabs():
             gr.Markdown("""
             ### Enroll New User
             Record **6 voice samples** to create your voiceprint. Speak naturally for 3-4 seconds each time.
             """)
             with gr.Row():
                 with gr.Column():
             gr.Markdown("""
             ### Verify Identity
             Record your voice to verify against your enrolled voiceprint.
             """)
             with gr.Row():
                 with gr.Column():
         with gr.Tab("API Docs"):
             gr.Markdown("""
+            ### REST API Endpoints
             **Base URL:** `https://amfafa-voice-authentication-sys.hf.space`
             ---
+            #### Basic Endpoints
+            - `POST /api/enroll` — Enroll a voice sample (audio, user_id, full_name)
+            - `POST /api/verify` — Verify a voice (audio, user_id)
+            - `GET /api/users` — List enrolled users
+            - `DELETE /api/users/{user_id}` — Delete a user
+            - `GET /api/health` — Health check
+            ---
+            #### Session-Based Voice Authentication Flow
+            These endpoints power the full conversational ATM experience.
+            **Step 1: Start session**
+            `POST /api/session/start` — Send `user_id` → Returns session_id
+            **Step 2: Verify identity**
+            `POST /api/session/verify` — Send audio + session_id → Returns greeting with user's name + challenge words
+            **Step 3: Liveness check**
+            `POST /api/session/liveness` — Send audio of challenge words + session_id → Returns authenticated or denied
+            **Step 4: Confirm transaction (simulated)**
+            `POST /api/session/transaction` — Send amount + session_id → Returns confirmation
+            **Check session**
+            `GET /api/session/{session_id}` — Returns current session state
+            """)
+# REST API endpoints
 from fastapi import UploadFile, File, Form
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
     allow_headers=["*"],
 )
+# Health check
 @fastapi_app.get("/api/health")
 async def health_check():
     return {
         "status": "healthy",
         "model": "UniSpeech-SAT + AAM-Softmax",
         "threshold": THRESHOLD,
         "device": str(DEVICE),
         "timestamp": datetime.now().isoformat()
     }
+# Basic enroll endpoint
 @fastapi_app.post("/api/enroll")
 async def api_enroll(audio: UploadFile = File(...), user_id: str = Form(...), full_name: str = Form(...)):
     try:
     except Exception as e:
         return JSONResponse(status_code=500, content={"success": False, "message": f"Server error: {str(e)}"})
+# Basic verify endpoint
 @fastapi_app.post("/api/verify")
 async def api_verify(audio: UploadFile = File(...), user_id: str = Form(...)):
     try:
         db = load_db()
         if uid not in db:
             os.unlink(tmp_path)
+            return JSONResponse(content={"success": False, "message": f"User '{uid}' not found."})
         if db[uid].get("status") != "enrolled":
             os.unlink(tmp_path)
+            return JSONResponse(content={"success": False, "message": "Enrollment incomplete."})
         wav = load_audio(tmp_path)
         os.unlink(tmp_path)
         is_live, live_msg = check_liveness(wav)
         if not is_live:
             record_attempt(uid, False)
+            return JSONResponse(content={"success": True, "access_granted": False, "user_id": uid, "message": live_msg, "liveness_passed": False})
         is_real, spoof_msg = check_antispoofing(wav)
         if not is_real:
             record_attempt(uid, False)
+            return JSONResponse(content={"success": True, "access_granted": False, "user_id": uid, "message": spoof_msg, "antispoofing_passed": False})
         test_emb = extract_embedding(wav)
         stored_emb = np.array(db[uid]["voiceprint"])
         granted = similarity >= THRESHOLD
         record_attempt(uid, granted)
         tracker = attempt_tracker.get(uid, {})
+        attempts_remaining = max(0, MAX_ATTEMPTS - tracker.get("count", 0))
         response = {
             "success": True,
             "attempts_remaining": attempts_remaining if not granted else MAX_ATTEMPTS,
             "locked": attempts_remaining == 0 and not granted
         }
         if granted:
             response["message"] = "Access granted. Voice verified."
+        elif attempts_remaining > 0:
+            response["message"] = f"Voice does not match. {attempts_remaining} attempt(s) remaining."
         else:
+            response["message"] = f"Account locked for {LOCKOUT_MINUTES} minutes."
         return JSONResponse(content=response)
     except Exception as e:
         return JSONResponse(status_code=500, content={"success": False, "message": f"Server error: {str(e)}"})
+# List users
 @fastapi_app.get("/api/users")
 async def api_list_users():
     db = load_db()
         })
     return JSONResponse(content={"success": True, "users": users, "total": len(users)})
+# Delete user
 @fastapi_app.delete("/api/users/{user_id}")
 async def api_delete_user(user_id: str):
     result = delete_user(user_id)
     success = "error" not in result.lower()
     return JSONResponse(content={"success": success, "message": result})
+# Reset lockout
 @fastapi_app.post("/api/reset-lockout")
 async def api_reset_lockout(user_id: str = Form(...)):
     result = reset_lockout(user_id)
     return JSONResponse(content={"success": True, "message": result})
+# SESSION-BASED ENDPOINTS (conversational ATM flow)
+# Step 1: Start a session
+@fastapi_app.post("/api/session/start")
+async def session_start(user_id: str = Form(...)):
+    uid = user_id.strip().upper()
+    db = load_db()
+    if uid not in db:
+        return JSONResponse(content={"success": False, "message": f"User '{uid}' not found. Please enroll first."})
+    if db[uid].get("status") != "enrolled":
+        return JSONResponse(content={"success": False, "message": "Enrollment incomplete."})
+    allowed, sec_msg = check_security(uid)
+    if not allowed:
+        return JSONResponse(content={"success": False, "message": sec_msg, "locked": True})
+    session = create_session(uid)
+    return JSONResponse(content={
+        "success": True,
+        "session_id": session["session_id"],
+        "user_id": uid,
+        "message": "Session started. Please provide a voice sample to verify your identity.",
+        "next_step": "verify",
+        "instruction": "Record your voice and send it to /api/session/verify"
+    })
+# Step 2: Verify identity (returns greeting + challenge)
+@fastapi_app.post("/api/session/verify")
+async def session_verify(audio: UploadFile = File(...), session_id: str = Form(...)):
+    session = get_session(session_id)
+    if not session:
+        return JSONResponse(content={"success": False, "message": "Session expired or not found. Start a new session."})
+    if session["step"] != SESSION_STEPS['STARTED']:
+        return JSONResponse(content={"success": False, "message": f"Invalid step. Current step: {session['step']}"})
+    uid = session["user_id"]
+    allowed, sec_msg = check_security(uid)
+    if not allowed:
+        session["step"] = SESSION_STEPS['DENIED']
+        return JSONResponse(content={"success": False, "message": sec_msg, "locked": True})
+    try:
+        audio_bytes = await audio.read()
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
+            tmp.write(audio_bytes)
+            tmp_path = tmp.name
+        wav = load_audio(tmp_path)
+        os.unlink(tmp_path)
+        is_live, live_msg = check_liveness(wav)
+        if not is_live:
+            record_attempt(uid, False)
+            return JSONResponse(content={"success": True, "verified": False, "message": live_msg})
+        is_real, spoof_msg = check_antispoofing(wav)
+        if not is_real:
+            record_attempt(uid, False)
+            return JSONResponse(content={"success": True, "verified": False, "message": spoof_msg})
+        test_emb = extract_embedding(wav)
+        db = load_db()
+        stored_emb = np.array(db[uid]["voiceprint"])
+        similarity = float(np.dot(test_emb, stored_emb) / (np.linalg.norm(test_emb) * np.linalg.norm(stored_emb) + 1e-10))
+        if similarity >= THRESHOLD:
+            record_attempt(uid, True)
+            full_name = db[uid].get("full_name", uid)
+            challenge = generate_challenge()
+            session["step"] = SESSION_STEPS['LIVENESS_PENDING']
+            session["full_name"] = full_name
+            session["similarity"] = round(similarity, 4)
+            session["challenge_phrase"] = challenge
+            return JSONResponse(content={
+                "success": True,
+                "verified": True,
+                "greeting": f"Welcome, {full_name}",
+                "full_name": full_name,
+                "similarity": round(similarity, 4),
+                "next_step": "liveness",
+                "challenge_phrase": challenge,
+                "instruction": f"Say these words: {challenge}",
+                "message": f"Voice verified. Welcome, {full_name}. For security, please say these words: {challenge}"
+            })
+        else:
+            record_attempt(uid, False)
+            tracker = attempt_tracker.get(uid, {})
+            attempts_remaining = max(0, MAX_ATTEMPTS - tracker.get("count", 0))
+            locked = attempts_remaining == 0
+            if locked:
+                session["step"] = SESSION_STEPS['DENIED']
+            return JSONResponse(content={
+                "success": True,
+                "verified": False,
+                "similarity": round(similarity, 4),
+                "attempts_remaining": attempts_remaining,
+                "locked": locked,
+                "message": f"Voice does not match. {attempts_remaining} attempt(s) remaining." if not locked else f"Account locked for {LOCKOUT_MINUTES} minutes."
+            })
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"success": False, "message": f"Server error: {str(e)}"})
+# Step 3: Liveness check (verify challenge phrase voice)
+@fastapi_app.post("/api/session/liveness")
+async def session_liveness(audio: UploadFile = File(...), session_id: str = Form(...)):
+    session = get_session(session_id)
+    if not session:
+        return JSONResponse(content={"success": False, "message": "Session expired or not found."})
+    if session["step"] != SESSION_STEPS['LIVENESS_PENDING']:
+        return JSONResponse(content={"success": False, "message": f"Invalid step. Current step: {session['step']}"})
+    uid = session["user_id"]
+    try:
+        audio_bytes = await audio.read()
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
+            tmp.write(audio_bytes)
+            tmp_path = tmp.name
+        wav = load_audio(tmp_path)
+        os.unlink(tmp_path)
+        is_live, live_msg = check_liveness(wav)
+        if not is_live:
+            return JSONResponse(content={"success": True, "liveness_passed": False, "message": live_msg})
+        is_real, spoof_msg = check_antispoofing(wav)
+        if not is_real:
+            return JSONResponse(content={"success": True, "liveness_passed": False, "message": spoof_msg})
+        # Verify it's still the same person speaking
+        test_emb = extract_embedding(wav)
+        db = load_db()
+        stored_emb = np.array(db[uid]["voiceprint"])
+        similarity = float(np.dot(test_emb, stored_emb) / (np.linalg.norm(test_emb) * np.linalg.norm(stored_emb) + 1e-10))
+        if similarity >= THRESHOLD:
+            session["step"] = SESSION_STEPS['AUTHENTICATED']
+            full_name = session["full_name"]
+            return JSONResponse(content={
+                "success": True,
+                "liveness_passed": True,
+                "authenticated": True,
+                "full_name": full_name,
+                "similarity": round(similarity, 4),
+                "next_step": "transaction",
+                "instruction": "How much would you like to withdraw?",
+                "message": f"Liveness confirmed. You are fully authenticated, {full_name}. How much would you like to withdraw?"
+            })
+        else:
+            return JSONResponse(content={
+                "success": True,
+                "liveness_passed": False,
+                "message": "Voice mismatch during liveness check. Please try again.",
+                "challenge_phrase": session["challenge_phrase"],
+                "instruction": f"Please say these words again: {session['challenge_phrase']}"
+            })
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"success": False, "message": f"Server error: {str(e)}"})
+# Step 4: Transaction (simulated)
+@fastapi_app.post("/api/session/transaction")
+async def session_transaction(session_id: str = Form(...), amount: str = Form(...)):
+    session = get_session(session_id)
+    if not session:
+        return JSONResponse(content={"success": False, "message": "Session expired or not found."})
+    if session["step"] != SESSION_STEPS['AUTHENTICATED']:
+        return JSONResponse(content={"success": False, "message": f"Not authenticated. Current step: {session['step']}"})
+    full_name = session["full_name"]
+    session["step"] = SESSION_STEPS['COMPLETE']
+    return JSONResponse(content={
+        "success": True,
+        "transaction_approved": True,
+        "full_name": full_name,
+        "amount": amount,
+        "message": f"Transaction approved. {full_name}, you are withdrawing {amount} cedis. Please collect your cash.",
+        "instruction": "Transaction complete. Session ended.",
+        "note": "In production, this step communicates with the bank's core system to process the actual withdrawal."
+    })
+# Get session status
+@fastapi_app.get("/api/session/{session_id}")
+async def session_status(session_id: str):
+    session = get_session(session_id)
+    if not session:
+        return JSONResponse(content={"success": False, "message": "Session expired or not found."})
+    return JSONResponse(content={
+        "success": True,
+        "session_id": session["session_id"],
+        "user_id": session["user_id"],
+        "step": session["step"],
+        "full_name": session["full_name"],
+        "challenge_phrase": session["challenge_phrase"],
+        "created_at": session["created_at"],
+        "expires_at": session["expires_at"]
+    })
+# Launch
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)