Update evaluate.py
Browse files- evaluate.py +126 -1
evaluate.py
CHANGED
|
@@ -814,7 +814,132 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
|
| 814 |
resume_preprocessing()
|
| 815 |
return response
|
| 816 |
|
| 817 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
def get_preprocessing_status():
|
| 819 |
"""Get the current status of reference audio preprocessing"""
|
| 820 |
global PREPROCESSING_COMPLETE, REFERENCE_CACHE, PREPROCESSING_ACTIVE, PREPROCESSING_PAUSE
|
|
|
|
| 814 |
resume_preprocessing()
|
| 815 |
return response
|
| 816 |
|
| 817 |
+
def handle_upload_reference(request, reference_dir, sample_rate):
|
| 818 |
+
"""Handle upload of reference audio files and preprocess immediately"""
|
| 819 |
+
global REFERENCE_CACHE
|
| 820 |
+
|
| 821 |
+
# Pause preprocessing while handling user request
|
| 822 |
+
pause_preprocessing()
|
| 823 |
+
|
| 824 |
+
try:
|
| 825 |
+
if "audio" not in request.files:
|
| 826 |
+
logger.warning("⚠️ Reference upload missing audio file")
|
| 827 |
+
# Resume preprocessing before returning
|
| 828 |
+
resume_preprocessing()
|
| 829 |
+
return jsonify({"error": "No audio file uploaded"}), 400
|
| 830 |
+
|
| 831 |
+
reference_word = request.form.get("reference_word", "").strip()
|
| 832 |
+
if not reference_word:
|
| 833 |
+
logger.warning("⚠️ Reference upload missing reference word")
|
| 834 |
+
# Resume preprocessing before returning
|
| 835 |
+
resume_preprocessing()
|
| 836 |
+
return jsonify({"error": "No reference word provided"}), 400
|
| 837 |
+
|
| 838 |
+
# Validate reference word
|
| 839 |
+
reference_patterns = [
|
| 840 |
+
"mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
|
| 841 |
+
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
|
| 842 |
+
"pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku",
|
| 843 |
+
"wa", "ali", "tuknang", "lagwa", "galo", "buri_ke_ini", "tara_na",
|
| 844 |
+
"nokarin_ka_ibat", "nokarin_ka_munta", "atiu_na_ku", "nanung_panayan_mu",
|
| 845 |
+
"mako_na_ka", "muli_ta_na", "nanu_ing_pengan_mu", "mekeni", "mengan_na_ka",
|
| 846 |
+
"munta_ka_karin", "magkanu_ini", "mimingat_ka", "mangan_ta_na", "lakwan_da_ka",
|
| 847 |
+
"nanu_maliari_kung_daptan_keka", "pilan_na_ka_banwa", "saliwan_ke_ini",
|
| 848 |
+
"makananu_munta_king", "adwa", "anam", "apat", "apulu", "atlu", "dinalan", "libu", "lima",
|
| 849 |
+
"metung", "pitu", "siyam", "walu", "masala", "madalumdum", "maragul", "marimla", "malagu", "marok", "mababa", "malapit", "matuling", "maputi",
|
| 850 |
+
"arung", "asbuk", "balugbug", "bitis", "buntuk", "butit", "gamat", "kuku", "salu", "tud",
|
| 851 |
+
"pisan", "dara", "achi", "apu", "ima", "tatang", "pengari", "koya", "kapatad", "wali",
|
| 852 |
+
"pasbul", "awang", "dagis", "bale", "ulas", "sambra", "sulu", "pitudturan", "luklukan", "ulnan"
|
| 853 |
+
]
|
| 854 |
+
|
| 855 |
+
if reference_word not in reference_patterns:
|
| 856 |
+
logger.warning(f"⚠️ Invalid reference word: {reference_word}")
|
| 857 |
+
# Resume preprocessing before returning
|
| 858 |
+
resume_preprocessing()
|
| 859 |
+
return jsonify({"error": f"Invalid reference word. Available: {reference_patterns}"}), 400
|
| 860 |
+
|
| 861 |
+
# Make sure we have a writable reference directory
|
| 862 |
+
if not os.path.exists(reference_dir):
|
| 863 |
+
reference_dir = os.path.join('/tmp', 'reference_audios')
|
| 864 |
+
os.makedirs(reference_dir, exist_ok=True)
|
| 865 |
+
logger.warning(f"⚠️ Using alternate reference directory for upload: {reference_dir}")
|
| 866 |
+
|
| 867 |
+
# Create directory for reference pattern if it doesn't exist
|
| 868 |
+
pattern_dir = os.path.join(reference_dir, reference_word)
|
| 869 |
+
os.makedirs(pattern_dir, exist_ok=True)
|
| 870 |
+
|
| 871 |
+
# Save the reference audio file
|
| 872 |
+
audio_file = request.files["audio"]
|
| 873 |
+
filename = secure_filename(audio_file.filename)
|
| 874 |
+
|
| 875 |
+
# Ensure filename has .wav extension
|
| 876 |
+
if not filename.lower().endswith('.wav'):
|
| 877 |
+
base_name = os.path.splitext(filename)[0]
|
| 878 |
+
filename = f"{base_name}.wav"
|
| 879 |
+
|
| 880 |
+
file_path = os.path.join(pattern_dir, filename)
|
| 881 |
+
|
| 882 |
+
# Create a temporary file first, then convert to WAV
|
| 883 |
+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
| 884 |
+
audio_file.save(temp_file.name)
|
| 885 |
+
temp_path = temp_file.name
|
| 886 |
+
|
| 887 |
+
try:
|
| 888 |
+
# Process the audio file
|
| 889 |
+
audio = AudioSegment.from_file(temp_path)
|
| 890 |
+
audio = audio.set_frame_rate(sample_rate).set_channels(1)
|
| 891 |
+
audio.export(file_path, format="wav")
|
| 892 |
+
logger.info(f"✅ Reference audio saved successfully for {reference_word}: {file_path}")
|
| 893 |
+
|
| 894 |
+
# Clean up temp file
|
| 895 |
+
try:
|
| 896 |
+
os.unlink(temp_path)
|
| 897 |
+
except:
|
| 898 |
+
pass
|
| 899 |
+
|
| 900 |
+
# Immediately preprocess this new reference file and add to cache
|
| 901 |
+
asr_model = get_asr_model()
|
| 902 |
+
asr_processor = get_asr_processor()
|
| 903 |
+
|
| 904 |
+
if asr_model and asr_processor:
|
| 905 |
+
# Initialize cache for this pattern if needed
|
| 906 |
+
if reference_word not in REFERENCE_CACHE:
|
| 907 |
+
REFERENCE_CACHE[reference_word] = {}
|
| 908 |
+
|
| 909 |
+
# Preprocess and add to cache
|
| 910 |
+
result = preprocess_reference_file(file_path, sample_rate, asr_model, asr_processor)
|
| 911 |
+
if result:
|
| 912 |
+
REFERENCE_CACHE[reference_word][filename] = result
|
| 913 |
+
logger.info(f"✅ New reference audio preprocessed and added to cache: {filename}")
|
| 914 |
+
|
| 915 |
+
except Exception as e:
|
| 916 |
+
logger.error(f"❌ Reference audio processing failed: {str(e)}")
|
| 917 |
+
# Resume preprocessing before returning
|
| 918 |
+
resume_preprocessing()
|
| 919 |
+
return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
|
| 920 |
+
|
| 921 |
+
# Count how many references we have now
|
| 922 |
+
references = glob.glob(os.path.join(pattern_dir, "*.wav"))
|
| 923 |
+
|
| 924 |
+
# Resume preprocessing before returning
|
| 925 |
+
resume_preprocessing()
|
| 926 |
+
return jsonify({
|
| 927 |
+
"message": "Reference audio uploaded successfully",
|
| 928 |
+
"reference_word": reference_word,
|
| 929 |
+
"file": filename,
|
| 930 |
+
"total_references": len(references),
|
| 931 |
+
"preprocessed": True
|
| 932 |
+
})
|
| 933 |
+
|
| 934 |
+
except Exception as e:
|
| 935 |
+
logger.error(f"❌ Unhandled exception in reference upload: {str(e)}")
|
| 936 |
+
logger.debug(f"Stack trace: {traceback.format_exc()}")
|
| 937 |
+
|
| 938 |
+
# Make sure to resume preprocessing even if there's an error
|
| 939 |
+
resume_preprocessing()
|
| 940 |
+
return jsonify({"error": f"Internal server error: {str(e)}"}), 500
|
| 941 |
+
|
| 942 |
+
# Add a new function to get preprocessing status
|
| 943 |
def get_preprocessing_status():
|
| 944 |
"""Get the current status of reference audio preprocessing"""
|
| 945 |
global PREPROCESSING_COMPLETE, REFERENCE_CACHE, PREPROCESSING_ACTIVE, PREPROCESSING_PAUSE
|