Spaces:

cdactvm
/

Tamil_ASR_Demo

Sleeping

cdactvm commited on Jan 1, 2025

Commit

c298401

verified ·

1 Parent(s): 146f2c8

Update processDoubles.py

Files changed (1) hide show

processDoubles.py CHANGED Viewed

@@ -1,54 +1,25 @@
-#!/usr/bin/env python
-# coding: utf-8
-# In[2]:
-# # Function to process "double" followed by a number
-# def process_doubles(sentence):
-#     tokens = sentence.split()
-#     result = []
-#     i = 0
-#     while i < len(tokens):
-#         if tokens[i] == "डबल":
-#             if i + 1 < len(tokens):
-#                 result.append(tokens[i + 1])
-#                 result.append(tokens[i + 1])
-#                 i += 2
-#             else:
-#                 result.append(tokens[i])
-#                 i += 1
-#         else:
-#             result.append(tokens[i])
-#             i += 1
-#     return ' '.join(result)
-# In[ ]:
-import re
-def process_doubles(sentence):
-    # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
-    sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)
-    tokens = sentence.split()
-    result = []
-    i = 0
-    while i < len(tokens):
-        if tokens[i] == "डबल":
-            if i + 1 < len(tokens):
-                result.append(tokens[i + 1])  # Append the next word/number
-                result.append(tokens[i + 1])  # Append the next word/number again to duplicate
-                i += 2  # Skip over the next word since it's already processed
-            else:
-                result.append(tokens[i])
-                i += 1
-        else:
-            result.append(tokens[i])
-            i += 1
-    return ' '.join(result)

+import re
+def process_doubles(sentence):
+    # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
+    sentence = re.sub(r'(டபுள்)(\S+)', r'\1 \2', sentence)
+    tokens = sentence.split()
+    result = []
+    i = 0
+    while i < len(tokens):
+        if tokens[i] == "டபுள்":
+            if i + 1 < len(tokens):
+                result.append(tokens[i + 1])  # Append the next word/number
+                result.append(tokens[i + 1])  # Append the next word/number again to duplicate
+                i += 2  # Skip over the next word since it's already processed
+            else:
+                result.append(tokens[i])
+                i += 1
+        else:
+            result.append(tokens[i])
+            i += 1
+    return ' '.join(result)