Update evaluate.py
Browse files- evaluate.py +23 -25
evaluate.py
CHANGED
|
@@ -309,7 +309,7 @@ def handle_upload_reference(request, reference_dir, sample_rate):
|
|
| 309 |
return jsonify({"error": f"Internal server error: {str(e)}"}), 500
|
| 310 |
|
| 311 |
def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
| 312 |
-
"""Handle pronunciation evaluation requests"""
|
| 313 |
request_id = f"req-{id(request)}" # Create unique ID for this request
|
| 314 |
logger.info(f"[{request_id}] π Starting new pronunciation evaluation request")
|
| 315 |
|
|
@@ -415,18 +415,18 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
|
| 415 |
logger.error(f"[{request_id}] β ASR inference failed: {str(e)}")
|
| 416 |
return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
|
| 417 |
|
| 418 |
-
# Process reference files
|
| 419 |
-
|
|
|
|
|
|
|
|
|
|
| 420 |
results = []
|
| 421 |
-
|
| 422 |
-
best_reference = None
|
| 423 |
-
best_transcription = None
|
| 424 |
-
|
| 425 |
# Use this if you want to limit the number of files to process
|
| 426 |
-
max_files_to_check = min(
|
| 427 |
reference_files = reference_files[:max_files_to_check]
|
| 428 |
|
| 429 |
-
logger.info(f"[{request_id}] π Processing {len(reference_files)} reference files in
|
| 430 |
|
| 431 |
# Function to process a single reference file
|
| 432 |
def process_reference_file(ref_file):
|
|
@@ -472,22 +472,20 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
|
| 472 |
"error": str(e)
|
| 473 |
}
|
| 474 |
|
| 475 |
-
# Process files
|
| 476 |
-
with ThreadPoolExecutor(max_workers=
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
logger.info(f"[{request_id}] π Found excellent match: {best_score:.2f}%")
|
| 490 |
-
break
|
| 491 |
|
| 492 |
# Clean up temp files
|
| 493 |
try:
|
|
|
|
| 309 |
return jsonify({"error": f"Internal server error: {str(e)}"}), 500
|
| 310 |
|
| 311 |
def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
| 312 |
+
"""Handle pronunciation evaluation requests with optimized parallel comparison"""
|
| 313 |
request_id = f"req-{id(request)}" # Create unique ID for this request
|
| 314 |
logger.info(f"[{request_id}] π Starting new pronunciation evaluation request")
|
| 315 |
|
|
|
|
| 415 |
logger.error(f"[{request_id}] β ASR inference failed: {str(e)}")
|
| 416 |
return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
|
| 417 |
|
| 418 |
+
# OPTIMIZATION: Process all reference files at once
|
| 419 |
+
import multiprocessing
|
| 420 |
+
|
| 421 |
+
# Determine optimal number of workers based on CPU count
|
| 422 |
+
max_workers = min(multiprocessing.cpu_count(), len(reference_files))
|
| 423 |
results = []
|
| 424 |
+
|
|
|
|
|
|
|
|
|
|
| 425 |
# Use this if you want to limit the number of files to process
|
| 426 |
+
max_files_to_check = min(len(reference_files), 10) # Increased from 5 to 10
|
| 427 |
reference_files = reference_files[:max_files_to_check]
|
| 428 |
|
| 429 |
+
logger.info(f"[{request_id}] π Processing {len(reference_files)} reference files in parallel with {max_workers} workers")
|
| 430 |
|
| 431 |
# Function to process a single reference file
|
| 432 |
def process_reference_file(ref_file):
|
|
|
|
| 472 |
"error": str(e)
|
| 473 |
}
|
| 474 |
|
| 475 |
+
# OPTIMIZATION: Process all files simultaneously using ThreadPoolExecutor
|
| 476 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 477 |
+
results = list(executor.map(process_reference_file, reference_files))
|
| 478 |
+
|
| 479 |
+
# Find the best result after all processing is complete
|
| 480 |
+
best_score = 0
|
| 481 |
+
best_reference = None
|
| 482 |
+
best_transcription = None
|
| 483 |
+
|
| 484 |
+
for result in results:
|
| 485 |
+
if result["similarity_score"] > best_score:
|
| 486 |
+
best_score = result["similarity_score"]
|
| 487 |
+
best_reference = result["reference_file"]
|
| 488 |
+
best_transcription = result["reference_text"]
|
|
|
|
|
|
|
| 489 |
|
| 490 |
# Clean up temp files
|
| 491 |
try:
|