Spaces:

cdactvm
/

Tamil_ASR_Demo

Sleeping

App Files Files Community

cdactvm commited on Jan 13, 2025

Commit

2f45a75

verified ·

1 Parent(s): 2deb19e

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -1

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ from waveletDenoise import wavelet_denoise
 from scipy.signal import butter, lfilter, wiener
 asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
 # Function to apply a high-pass filter
 def high_pass_filter(audio, sr, cutoff=300):
     nyquist = 0.5 * sr
@@ -52,6 +52,101 @@ def wavelet_denoise(audio, wavelet='db1', level=1):
 def apply_wiener_filter(audio):
     return wiener(audio)
 # Function to handle speech recognition
 def recognize_speech(audio_file):
     audio, sr = librosa.load(audio_file, sr=16000)
@@ -61,6 +156,7 @@ def recognize_speech(audio_file):
     result = asr_model(denoised_audio)
     text_value = result['text']
     cleaned_text = text_value.replace("<s>", "")
     # converted_to_list = convert_to_list(cleaned_text, text_to_list())
     # processed_doubles = process_doubles(converted_to_list)
     # replaced_words = replace_words(processed_doubles)

 from scipy.signal import butter, lfilter, wiener
 asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
+lex=createlex("num_words_ta.txt")
 # Function to apply a high-pass filter
 def high_pass_filter(audio, sr, cutoff=300):
     nyquist = 0.5 * sr
 def apply_wiener_filter(audio):
     return wiener(audio)
+def createlex(filename):
+# Initialize an empty dictionary
+    data_dict = {}
+# Open the file and read it line by line
+    with open(filename, "r", encoding="utf-8") as f:
+        for line in f:
+        # Strip newline characters and split by tab
+            key, value = line.strip().split("\t")
+        # Add to dictionary
+            data_dict[key] = value
+    return data_dict
+def addnum(inlist):
+    sum=0
+    for num in inlist:
+        sum+=int(num)
+    return sum
+from rapidfuzz import process
+def get_val(word, lexicon):
+    threshold = 80  # Minimum similarity score
+    length_difference = 4
+    #length_range = (4, 6)  # Acceptable character length range (min, max)
+    # Find the best match above the similarity threshold
+    result = process.extractOne(word, lexicon.keys(), score_cutoff=threshold)
+    #print (result)
+    if result:
+        match, score, _ = result
+        #print(lexicon[match])
+        #return lexicon[match]
+        if abs(len(match) - len(word)) <= length_difference:
+        #if length_range[0] <= len(match) <= length_range[1]:
+            return lexicon[match]
+        else:
+            return None
+    else:
+        return None
+def convert2num(input, lex):
+    input += " #"  # Add a period for termination
+    words = input.split()
+    i = 0
+    num = 0
+    outstr = ""
+    digit_end = True
+    numlist = []
+    addflag = False
+    # Process the words
+    while i < len(words):
+        #checkwordlist = handleSpecialnum(words[i])
+        # Handle special numbers
+        #if len(checkwordlist) == 2:
+        #    words[i] = checkwordlist[0]
+        #    words.insert(i + 1, checkwordlist[1])  # Collect new word for later processing
+        # Get numerical value of the word
+        numval = get_val(words[i], lex)
+        if numval is not None:
+            if words[i][-4:] in ('த்து', 'ற்று'):
+                addflag = True
+                numlist.append(numval)
+            else:
+                if addflag:
+                    numlist.append(numval)
+                    num = addnum(numlist)
+                    outstr += str(num) + " "
+                    addflag = False
+                    numlist = []
+                else:
+                    outstr += " " + str(numval) + " "
+            digit_end = False
+        else:
+            if addflag:
+                num = addnum(numlist)
+                outstr += str(num) + " " + words[i] + " "
+                addflag = False
+                numlist = []
+            else:
+                outstr += words[i] + " "
+            if not digit_end:
+                digit_end = True
+        # Move to the next word
+        i += 1
+    # Final processing
+    outstr = outstr.replace('#','')  # Remove trailing spaces
+    return outstr
 # Function to handle speech recognition
 def recognize_speech(audio_file):
     audio, sr = librosa.load(audio_file, sr=16000)
     result = asr_model(denoised_audio)
     text_value = result['text']
     cleaned_text = text_value.replace("<s>", "")
+    cleaned_text=convert2num(cleaned_text,lex)
     # converted_to_list = convert_to_list(cleaned_text, text_to_list())
     # processed_doubles = process_doubles(converted_to_list)
     # replaced_words = replace_words(processed_doubles)