Spaces:

Coco-18
/

Kapamtalk

Sleeping

App Files Files Community

Coco-18 commited on Apr 2, 2025

Commit

a26b533

verified ·

1 Parent(s): ceb8cef

Update evaluate.py

Browse files

Files changed (1) hide show

evaluate.py +308 -117

evaluate.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# evaluate.py - Handles evaluation and comparing tasks
 import os
 import glob
@@ -13,6 +13,9 @@ from pydub import AudioSegment
 from flask import jsonify
 from werkzeug.utils import secure_filename
 from concurrent.futures import ThreadPoolExecutor
 # Import necessary functions from translator.py
 from translator import get_asr_model, get_asr_processor, LANGUAGE_CODES
@@ -20,9 +23,18 @@ from translator import get_asr_model, get_asr_processor, LANGUAGE_CODES
 # Configure logging
 logger = logging.getLogger("speech_api")
-# Initialize cache at module level instead
 EVALUATION_CACHE = {}
 def calculate_similarity(text1, text2):
     """Calculate text similarity percentage."""
     def clean_text(text):
@@ -105,8 +117,130 @@ def search_reference_directories():
     return found_dirs
 def init_reference_audio(reference_dir, output_dir):
-    """Initialize reference audio directories and return the working directory path"""
     try:
         # Create the output directory first
         os.makedirs(output_dir, exist_ok=True)
@@ -179,7 +313,7 @@ def init_reference_audio(reference_dir, output_dir):
                     except Exception as e:
                         logger.warning(f"⚠️ Failed to copy reference files: {str(e)}")
-        # Log the final contents, excluding dummy files - MODIFIED HERE
         pattern_dirs = [d for d in os.listdir(working_dir)
                        if os.path.isdir(os.path.join(working_dir, d))]
@@ -191,8 +325,6 @@ def init_reference_audio(reference_dir, output_dir):
             # Count only non-dummy files
             valid_files = [f for f in wav_files if "dummy_reference" not in f]
             total_wav_files += len(valid_files)
-            # Remove the individual directory logging
-            # logger.info(f"  - {pattern}: {len(valid_files)} valid WAV files")
         logger.info(f"📊 Total pattern directories: {len(pattern_dirs)}, Total reference WAV files: {total_wav_files}")
@@ -207,6 +339,9 @@ def init_reference_audio(reference_dir, output_dir):
                 except Exception as e:
                     logger.warning(f"⚠️ Failed to remove dummy file {dummy}: {str(e)}")
         return working_dir
     except Exception as e:
@@ -225,7 +360,9 @@ def init_reference_audio(reference_dir, output_dir):
             return reference_dir
 def handle_upload_reference(request, reference_dir, sample_rate):
-    """Handle upload of reference audio files"""
     try:
         if "audio" not in request.files:
             logger.warning("⚠️ Reference upload missing audio file")
@@ -295,6 +432,22 @@ def handle_upload_reference(request, reference_dir, sample_rate):
                 os.unlink(temp_path)
             except:
                 pass
         except Exception as e:
             logger.error(f"❌ Reference audio processing failed: {str(e)}")
             return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
@@ -305,7 +458,8 @@ def handle_upload_reference(request, reference_dir, sample_rate):
             "message": "Reference audio uploaded successfully",
             "reference_word": reference_word,
             "file": filename,
-            "total_references": len(references)
         })
     except Exception as e:
@@ -314,7 +468,9 @@ def handle_upload_reference(request, reference_dir, sample_rate):
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
 def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
-    """Handle pronunciation evaluation requests with speed optimizations"""
     request_id = f"req-{id(request)}"
     logger.info(f"[{request_id}] 🆕 Starting pronunciation evaluation request")
@@ -329,7 +485,7 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
         return jsonify({"error": "ASR model not available"}), 503
     try:
-        # OPTIMIZATION 1: Check cache first for identical audio
         if "audio" not in request.files:
             logger.warning(f"[{request_id}] ⚠️ Evaluation request missing audio file")
             return jsonify({"error": "No audio file uploaded"}), 400
@@ -343,11 +499,10 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
             logger.warning(f"[{request_id}] ⚠️ No reference locator provided")
             return jsonify({"error": "Reference locator is required"}), 400
-        # OPTIMIZATION 2: Simple caching based on audio content hash + reference_locator
         audio_content = audio_file.read()
         audio_file.seek(0)  # Reset file pointer after reading
-        import hashlib
         audio_hash = hashlib.md5(audio_content).hexdigest()
         cache_key = f"{audio_hash}_{reference_locator}_{language}"
@@ -416,120 +571,121 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
         # Transcribe user audio
         try:
             logger.info(f"[{request_id}] 🔄 Transcribing user audio")
-            # Remove language parameter if causing warnings
-            inputs = asr_processor(
-                user_waveform,
-                sampling_rate=sample_rate,
-                return_tensors="pt"
-            )
-            inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
-            with torch.no_grad():
-                logits = asr_model(**inputs).logits
-            ids = torch.argmax(logits, dim=-1)[0]
-            user_transcription = asr_processor.decode(ids)
             logger.info(f"[{request_id}] ✅ User transcription: '{user_transcription}'")
         except Exception as e:
             logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
             return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
-        # OPTIMIZATION 3: Use a smaller sample of reference files
-        import multiprocessing
-        import random
-        # OPTIMIZATION 4: Limit to just a few files for initial comparison
-        # If we have many reference files, randomly sample some for quick evaluation
-        if len(reference_files) > 3:
-            # Randomly select 3 files for faster comparison
-            reference_files_sample = random.sample(reference_files, 3)
-        else:
-            reference_files_sample = reference_files
-        # Determine optimal number of workers based on CPU count (but keep it small)
-        max_workers = min(multiprocessing.cpu_count(), len(reference_files_sample), 3)
-        initial_results = []
-        logger.info(f"[{request_id}] 🔄 Quick scan: processing {len(reference_files_sample)} reference files")
-        # Function to process a single reference file
-        def process_reference_file(ref_file):
-            ref_filename = os.path.basename(ref_file)
-            try:
-                # Load and resample reference audio
-                ref_waveform, ref_sr = torchaudio.load(ref_file)
-                if ref_sr != sample_rate:
-                    ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
-                ref_waveform = ref_waveform.squeeze().numpy()
-                # Transcribe reference audio
-                inputs = asr_processor(
-                    ref_waveform,
-                    sampling_rate=sample_rate,
-                    return_tensors="pt"
-                )
-                inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
-                with torch.no_grad():
-                    logits = asr_model(**inputs).logits
-                ids = torch.argmax(logits, dim=-1)[0]
-                ref_transcription = asr_processor.decode(ids)
-                # Calculate similarity
                 similarity = calculate_similarity(user_transcription, ref_transcription)
                 logger.info(
                     f"[{request_id}] 📊 Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'")
-                return {
                     "reference_file": ref_filename,
                     "reference_text": ref_transcription,
                     "similarity_score": similarity
-                }
-            except Exception as e:
-                logger.error(f"[{request_id}] ❌ Error processing {ref_filename}: {str(e)}")
-                return {
-                    "reference_file": ref_filename,
-                    "reference_text": "Error",
-                    "similarity_score": 0,
-                    "error": str(e)
-                }
-        # Process the sample files in parallel
-        with ThreadPoolExecutor(max_workers=max_workers) as executor:
-            initial_results = list(executor.map(process_reference_file, reference_files_sample))
-        # Find the best result from the initial sample
-        best_score = 0
-        best_reference = None
-        best_transcription = None
-        for result in initial_results:
-            if result["similarity_score"] > best_score:
-                best_score = result["similarity_score"]
-                best_reference = result["reference_file"]
-                best_transcription = result["reference_text"]
-        # OPTIMIZATION 5: If we already found a very good match, don't process more files
-        all_results = initial_results.copy()
-        remaining_files = [f for f in reference_files if f not in reference_files_sample]
-        # Only process more files if our best score isn't already very good
-        if best_score < 80.0 and remaining_files:
-            logger.info(f"[{request_id}] 🔄 Score {best_score:.2f}% not high enough, checking {len(remaining_files)} more references")
-            # Process remaining files
             with ThreadPoolExecutor(max_workers=max_workers) as executor:
-                additional_results = list(executor.map(process_reference_file, remaining_files[:5]))  # Process max 5 more
-                all_results.extend(additional_results)
-                # Update best result if we found a better one
-                for result in additional_results:
-                    if result["similarity_score"] > best_score:
-                        best_score = result["similarity_score"]
-                        best_reference = result["reference_file"]
-                        best_transcription = result["reference_text"]
         # Clean up temp files
         try:
             if temp_dir and os.path.exists(temp_dir):
@@ -537,6 +693,20 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
                 logger.debug(f"[{request_id}] 🧹 Cleaned up temporary directory")
         except Exception as e:
             logger.warning(f"[{request_id}] ⚠️ Failed to clean up temp files: {str(e)}")
         # Determine feedback based on score
         is_correct = best_score >= 70.0
@@ -554,11 +724,8 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
         logger.info(f"[{request_id}] 📊 Final evaluation results: score={best_score:.2f}%, is_correct={is_correct}")
         logger.info(f"[{request_id}] 📝 Feedback: '{feedback}'")
-        logger.info(f"[{request_id}] ✅ Evaluation complete")
-        # Sort results by score descending
-        all_results.sort(key=lambda x: x["similarity_score"], reverse=True)
         # Create response
         response = jsonify({
             "is_correct": is_correct,
@@ -570,10 +737,11 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
             "details": all_results,
             "total_references_compared": len(all_results),
             "total_available_references": len(reference_files),
-            "quick_evaluation": True
         })
-        # OPTIMIZATION 6: Cache the result for future requests using module-level cache
         MAX_CACHE_SIZE = 50
         EVALUATION_CACHE[cache_key] = response
         if len(EVALUATION_CACHE) > MAX_CACHE_SIZE:
@@ -593,4 +761,27 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
         except:
             pass
-        return jsonify({"error": f"Internal server error: {str(e)}"}), 500

+# evaluate.py - Handles evaluation and comparing tasks with reference preprocessing
 import os
 import glob
 from flask import jsonify
 from werkzeug.utils import secure_filename
 from concurrent.futures import ThreadPoolExecutor
+import hashlib
+import threading
+import time
 # Import necessary functions from translator.py
 from translator import get_asr_model, get_asr_processor, LANGUAGE_CODES
 # Configure logging
 logger = logging.getLogger("speech_api")
+# Enhanced cache structure to store preprocessed reference audio data
+# Format: {reference_locator: {reference_file: {waveform, transcription, processed_at}}}
+REFERENCE_CACHE = {}
+# Traditional evaluation cache for quick responses to identical requests
 EVALUATION_CACHE = {}
+# A flag to indicate if preprocessing is complete
+PREPROCESSING_COMPLETE = False
+PREPROCESSING_LOCK = threading.Lock()
+PREPROCESSING_THREAD = None
 def calculate_similarity(text1, text2):
     """Calculate text similarity percentage."""
     def clean_text(text):
     return found_dirs
+def transcribe_audio(waveform, sample_rate, asr_model, asr_processor):
+    """Helper function to transcribe audio using the ASR model"""
+    inputs = asr_processor(
+        waveform,
+        sampling_rate=sample_rate,
+        return_tensors="pt"
+    )
+    inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
+    with torch.no_grad():
+        logits = asr_model(**inputs).logits
+    ids = torch.argmax(logits, dim=-1)[0]
+    transcription = asr_processor.decode(ids)
+    return transcription
+def preprocess_reference_file(ref_file, sample_rate, asr_model, asr_processor):
+    """Preprocess a single reference file and return its transcription"""
+    ref_filename = os.path.basename(ref_file)
+    try:
+        # Load and resample reference audio
+        ref_waveform, ref_sr = torchaudio.load(ref_file)
+        if ref_sr != sample_rate:
+            ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
+        ref_waveform = ref_waveform.squeeze().numpy()
+        # Transcribe reference audio
+        ref_transcription = transcribe_audio(ref_waveform, sample_rate, asr_model, asr_processor)
+        logger.debug(f"Preprocessed reference file: {ref_filename}, transcription: '{ref_transcription}'")
+        return {
+            "waveform": ref_waveform,
+            "transcription": ref_transcription,
+            "processed_at": time.time()
+        }
+    except Exception as e:
+        logger.error(f"❌ Error preprocessing {ref_filename}: {str(e)}")
+        return None
+def preprocess_all_references(reference_dir, sample_rate=16000):
+    """Preprocess all reference audio files at startup"""
+    global PREPROCESSING_COMPLETE, REFERENCE_CACHE
+    logger.info("🚀 Starting preprocessing of all reference audio files...")
+    # Get ASR model and processor
+    asr_model = get_asr_model()
+    asr_processor = get_asr_processor()
+    if asr_model is None or asr_processor is None:
+        logger.error("❌ Cannot preprocess reference audio - ASR models not loaded")
+        return False
+    try:
+        pattern_dirs = [d for d in os.listdir(reference_dir)
+                     if os.path.isdir(os.path.join(reference_dir, d))]
+        total_processed = 0
+        start_time = time.time()
+        # Process each reference pattern directory
+        for pattern in pattern_dirs:
+            pattern_path = os.path.join(reference_dir, pattern)
+            reference_files = glob.glob(os.path.join(pattern_path, "*.wav"))
+            reference_files = [f for f in reference_files if "dummy_reference" not in f]
+            if not reference_files:
+                continue
+            # Initialize cache for this pattern if needed
+            if pattern not in REFERENCE_CACHE:
+                REFERENCE_CACHE[pattern] = {}
+            logger.info(f"🔄 Preprocessing {len(reference_files)} references for pattern: {pattern}")
+            # Determine optimal number of workers
+            max_workers = min(os.cpu_count() or 4, len(reference_files), 5)
+            # Process files in parallel
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                tasks = {
+                    executor.submit(preprocess_reference_file, ref_file, sample_rate, asr_model, asr_processor):
+                    ref_file for ref_file in reference_files
+                }
+                for future in tasks:
+                    ref_file = tasks[future]
+                    try:
+                        result = future.result()
+                        if result:
+                            REFERENCE_CACHE[pattern][os.path.basename(ref_file)] = result
+                            total_processed += 1
+                    except Exception as e:
+                        logger.error(f"❌ Failed to process {ref_file}: {str(e)}")
+        elapsed_time = time.time() - start_time
+        logger.info(f"✅ Preprocessing complete! Processed {total_processed} reference files in {elapsed_time:.2f} seconds")
+        with PREPROCESSING_LOCK:
+            PREPROCESSING_COMPLETE = True
+        return True
+    except Exception as e:
+        logger.error(f"❌ Error during reference preprocessing: {str(e)}")
+        logger.debug(f"Stack trace: {traceback.format_exc()}")
+        return False
+def start_preprocessing_thread(reference_dir, sample_rate=16000):
+    """Start preprocessing in a background thread"""
+    global PREPROCESSING_THREAD
+    def preprocessing_worker():
+        preprocess_all_references(reference_dir, sample_rate)
+    PREPROCESSING_THREAD = threading.Thread(target=preprocessing_worker)
+    PREPROCESSING_THREAD.daemon = True  # Allow thread to exit when main thread exits
+    PREPROCESSING_THREAD.start()
+    logger.info("🧵 Started reference audio preprocessing in background thread")
 def init_reference_audio(reference_dir, output_dir):
+    """Initialize reference audio directories and start preprocessing"""
     try:
         # Create the output directory first
         os.makedirs(output_dir, exist_ok=True)
                     except Exception as e:
                         logger.warning(f"⚠️ Failed to copy reference files: {str(e)}")
+        # Log the final contents, excluding dummy files
         pattern_dirs = [d for d in os.listdir(working_dir)
                        if os.path.isdir(os.path.join(working_dir, d))]
             # Count only non-dummy files
             valid_files = [f for f in wav_files if "dummy_reference" not in f]
             total_wav_files += len(valid_files)
         logger.info(f"📊 Total pattern directories: {len(pattern_dirs)}, Total reference WAV files: {total_wav_files}")
                 except Exception as e:
                     logger.warning(f"⚠️ Failed to remove dummy file {dummy}: {str(e)}")
+        # Start preprocessing references in background
+        start_preprocessing_thread(working_dir)
         return working_dir
     except Exception as e:
             return reference_dir
 def handle_upload_reference(request, reference_dir, sample_rate):
+    """Handle upload of reference audio files and preprocess immediately"""
+    global REFERENCE_CACHE
     try:
         if "audio" not in request.files:
             logger.warning("⚠️ Reference upload missing audio file")
                 os.unlink(temp_path)
             except:
                 pass
+            # Immediately preprocess this new reference file and add to cache
+            asr_model = get_asr_model()
+            asr_processor = get_asr_processor()
+            if asr_model and asr_processor:
+                # Initialize cache for this pattern if needed
+                if reference_word not in REFERENCE_CACHE:
+                    REFERENCE_CACHE[reference_word] = {}
+                # Preprocess and add to cache
+                result = preprocess_reference_file(file_path, sample_rate, asr_model, asr_processor)
+                if result:
+                    REFERENCE_CACHE[reference_word][filename] = result
+                    logger.info(f"✅ New reference audio preprocessed and added to cache: {filename}")
         except Exception as e:
             logger.error(f"❌ Reference audio processing failed: {str(e)}")
             return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
             "message": "Reference audio uploaded successfully",
             "reference_word": reference_word,
             "file": filename,
+            "total_references": len(references),
+            "preprocessed": True
         })
     except Exception as e:
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
 def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
+    """Handle pronunciation evaluation requests with preprocessing optimization"""
+    global REFERENCE_CACHE, PREPROCESSING_COMPLETE
     request_id = f"req-{id(request)}"
     logger.info(f"[{request_id}] 🆕 Starting pronunciation evaluation request")
         return jsonify({"error": "ASR model not available"}), 503
     try:
+        # Check for basic request requirements
         if "audio" not in request.files:
             logger.warning(f"[{request_id}] ⚠️ Evaluation request missing audio file")
             return jsonify({"error": "No audio file uploaded"}), 400
             logger.warning(f"[{request_id}] ⚠️ No reference locator provided")
             return jsonify({"error": "Reference locator is required"}), 400
+        # OPTIMIZATION: Simple caching based on audio content hash + reference_locator
         audio_content = audio_file.read()
         audio_file.seek(0)  # Reset file pointer after reading
         audio_hash = hashlib.md5(audio_content).hexdigest()
         cache_key = f"{audio_hash}_{reference_locator}_{language}"
         # Transcribe user audio
         try:
             logger.info(f"[{request_id}] 🔄 Transcribing user audio")
+            user_transcription = transcribe_audio(user_waveform, sample_rate, asr_model, asr_processor)
             logger.info(f"[{request_id}] ✅ User transcription: '{user_transcription}'")
         except Exception as e:
             logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
             return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
+        # Check if we have preprocessed data for this reference locator
+        using_preprocessed = False
+        all_results = []
+        if reference_locator in REFERENCE_CACHE and REFERENCE_CACHE[reference_locator]:
+            using_preprocessed = True
+            logger.info(f"[{request_id}] 🚀 Using preprocessed reference data for {reference_locator}")
+            # Compare with all cached references
+            for ref_filename, ref_data in REFERENCE_CACHE[reference_locator].items():
+                ref_transcription = ref_data["transcription"]
                 similarity = calculate_similarity(user_transcription, ref_transcription)
                 logger.info(
                     f"[{request_id}] 📊 Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'")
+                all_results.append({
                     "reference_file": ref_filename,
                     "reference_text": ref_transcription,
                     "similarity_score": similarity
+                })
+        else:
+            # If not preprocessed yet, do traditional processing
+            logger.info(f"[{request_id}] ⚠️ No preprocessed data available for {reference_locator}, processing on demand")
+            # Process files in parallel with ThreadPoolExecutor
+            import random
+            import multiprocessing
+            # Determine optimal number of workers based on CPU count (but keep it small)
+            max_workers = min(multiprocessing.cpu_count(), len(reference_files), 3)
+            # Function to process a single reference file
+            def process_reference_file(ref_file):
+                ref_filename = os.path.basename(ref_file)
+                try:
+                    # Load and resample reference audio
+                    ref_waveform, ref_sr = torchaudio.load(ref_file)
+                    if ref_sr != sample_rate:
+                        ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
+                    ref_waveform = ref_waveform.squeeze().numpy()
+                    # Transcribe reference audio
+                    ref_transcription = transcribe_audio(ref_waveform, sample_rate, asr_model, asr_processor)
+                    # Add to cache for future use
+                    if reference_locator not in REFERENCE_CACHE:
+                        REFERENCE_CACHE[reference_locator] = {}
+                    REFERENCE_CACHE[reference_locator][ref_filename] = {
+                        "waveform": ref_waveform,
+                        "transcription": ref_transcription,
+                        "processed_at": time.time()
+                    }
+                    # Calculate similarity
+                    similarity = calculate_similarity(user_transcription, ref_transcription)
+                    logger.info(
+                        f"[{request_id}] 📊 Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'")
+                    return {
+                        "reference_file": ref_filename,
+                        "reference_text": ref_transcription,
+                        "similarity_score": similarity
+                    }
+                except Exception as e:
+                    logger.error(f"[{request_id}] ❌ Error processing {ref_filename}: {str(e)}")
+                    return {
+                        "reference_file": ref_filename,
+                        "reference_text": "Error",
+                        "similarity_score": 0,
+                        "error": str(e)
+                    }
+            # If we have many files, select a smaller sample for initial quick evaluation
+            if len(reference_files) > 3 and not using_preprocessed:
+                reference_files_sample = random.sample(reference_files, 3)
+            else:
+                reference_files_sample = reference_files
+            logger.info(f"[{request_id}] 🔄 Processing {len(reference_files_sample)} reference files")
+            # Process the files in parallel
             with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                initial_results = list(executor.map(process_reference_file, reference_files_sample))
+                all_results = initial_results.copy()
+                # If we didn't process all files and didn't find a good match, process more
+                if len(reference_files_sample) < len(reference_files) and not using_preprocessed:
+                    # Find the best result so far
+                    best_score = 0
+                    for result in all_results:
+                        if result["similarity_score"] > best_score:
+                            best_score = result["similarity_score"]
+                    # Only process more files if our best score isn't already very good
+                    if best_score < 80.0:
+                        remaining_files = [f for f in reference_files if f not in reference_files_sample]
+                        logger.info(f"[{request_id}] 🔄 Score {best_score:.2f}% not high enough, checking {len(remaining_files)} more references")
+                        # Limit how many additional files we process
+                        additional_files = remaining_files[:5]  # Process max 5 more
+                        # Process remaining files
+                        additional_results = list(executor.map(process_reference_file, additional_files))
+                        all_results.extend(additional_results)
         # Clean up temp files
         try:
             if temp_dir and os.path.exists(temp_dir):
                 logger.debug(f"[{request_id}] 🧹 Cleaned up temporary directory")
         except Exception as e:
             logger.warning(f"[{request_id}] ⚠️ Failed to clean up temp files: {str(e)}")
+        # Find the best result
+        best_score = 0
+        best_reference = None
+        best_transcription = None
+        # Sort results by score descending
+        all_results.sort(key=lambda x: x["similarity_score"], reverse=True)
+        if all_results:
+            best_result = all_results[0]
+            best_score = best_result["similarity_score"]
+            best_reference = best_result["reference_file"]
+            best_transcription = best_result["reference_text"]
         # Determine feedback based on score
         is_correct = best_score >= 70.0
         logger.info(f"[{request_id}] 📊 Final evaluation results: score={best_score:.2f}%, is_correct={is_correct}")
         logger.info(f"[{request_id}] 📝 Feedback: '{feedback}'")
+        logger.info(f"[{request_id}] ✅ Evaluation complete using {'preprocessed' if using_preprocessed else 'on-demand'} reference data")
         # Create response
         response = jsonify({
             "is_correct": is_correct,
             "details": all_results,
             "total_references_compared": len(all_results),
             "total_available_references": len(reference_files),
+            "used_preprocessed_data": using_preprocessed,
+            "preprocessing_complete": PREPROCESSING_COMPLETE
         })
+        # Cache the result for future identical requests
         MAX_CACHE_SIZE = 50
         EVALUATION_CACHE[cache_key] = response
         if len(EVALUATION_CACHE) > MAX_CACHE_SIZE:
         except:
             pass
+        return jsonify({"error": f"Internal server error: {str(e)}"}), 500
+# Add a new function to get preprocessing status
+def get_preprocessing_status():
+    """Get the current status of reference audio preprocessing"""
+    global PREPROCESSING_COMPLETE, REFERENCE_CACHE
+    with PREPROCESSING_LOCK:
+        is_complete = PREPROCESSING_COMPLETE
+    # Count total preprocessed references
+    preprocessed_count = 0
+    for pattern, files in REFERENCE_CACHE.items():
+        preprocessed_count += len(files)
+    # Check if preprocessing thread is alive
+    thread_running = PREPROCESSING_THREAD is not None and PREPROCESSING_THREAD.is_alive()
+    return {
+        "complete": is_complete,
+        "preprocessed_files": preprocessed_count,
+        "patterns_cached": len(REFERENCE_CACHE),
+        "thread_running": thread_running
+    }