Spaces:

Dionyssos
/

SHIFT

Sleeping

App Files Files Community

Dionyssos commited on Sep 24, 2025

Commit

2049895

1 Parent(s): 53e868b

txt ruls

Browse files

Files changed (1) hide show

textual.py +8 -80

textual.py CHANGED Viewed

@@ -3,18 +3,8 @@ import unicodedata
 from num2words import num2words
 from num2word_greek.numbers2words import convert_numbers
-def only_greek_or_only_latin(text, lang='grc'):
-    '''
-        str: The converted string in the specified target script.
-             Characters not found in any mapping are preserved as is.
-             Latin accented characters in the input (e.g., 'É', 'ü') will
-             be preserved in their lowercase form (e.g., 'é', 'ü') if
-             converting to Latin.
-    '''
-    # --- Mapping Dictionaries ---
-    # Keys are in lowercase as input text is case-folded.
-    # If the output needs to maintain original casing, additional logic is required.
     latin_to_greek_map = {
         'a': 'α', 'b': 'β', 'g': 'γ', 'd': 'δ', 'e': 'ε',
@@ -147,10 +137,7 @@ def only_greek_or_only_latin(text, lang='grc'):
                 current_index += 1
         return ''.join(output_chars)
-# =====================================================
-#
 def fix_vocals(text, lang='ron'):
@@ -177,20 +164,15 @@ def fix_vocals(text, lang='ron'):
         '<': ' mai mic decât ',
         '>': ' mai mare decât',
         '%': ' la sută ', # percent (from previous)
-        '≠': ' nu este egal cu ',
-        '≤': ' mai mic sau egal cu ',
-        '≥': ' mai mare sau egal cu ',
-        '≈': ' aproximativ ',
-        '∞': ' infinit ',
         '€': ' euro ',
         '$': ' dolar ',
         '£': ' liră ',
         '&': ' și ',  # and
-        '@': ' la ',  # at
-        '#': ' diez ',  # hash
         '∑': ' sumă ',
         '∫': ' integrală ',
-        '√': ' rădăcina pătrată a ', # more generic square root
     }
     eng_replacements = {
@@ -211,14 +193,6 @@ def fix_vocals(text, lang='ron'):
         '>': ' greater than ',
         # Additional common math symbols from previous list
         '%': ' percent ',
-        '∑': ' sum ',
-        '∫': ' integral ',
-        '√': ' square root of ',
-        '≠': ' not equals ',
-        '≤': ' less than or equals ',
-        '≥': ' greater than or equals ',
-        '≈': ' approximately ',
-        '∞': ' infinity ',
         '€': ' euro ',
         '$': ' dollar ',
         '£': ' pound ',
@@ -249,20 +223,9 @@ def fix_vocals(text, lang='ron'):
         '<': ' manje od ',
         '>': ' veće od ',
         '%': ' procenat ',
-        '∑': ' suma ',
-        '∫': ' integral ',
-        '√': ' kvadratni koren ',
-        '≠': ' nije jednako ',
-        '≤': ' manje ili jednako od ',
-        '≥': ' veće ili jednako od ',
-        '≈': ' približno ',
-        '∞': ' beskonačnost ',
         '€': ' evro ',
         '$': ' dolar ',
         '£': ' funta ',
-        '&': ' i ',
-        '@': ' et ',
-        '#': ' taraba ',
         # Others
         #     'rn': 'rrn',
         # 'ć': 'č',
@@ -312,14 +275,6 @@ def fix_vocals(text, lang='ron'):
         '>': ' größer als',
         # Additional common math symbols from previous list
         '%': ' prozent ',
-        '∑': ' Summe ',
-        '∫': ' Integral ',
-        '√': ' Quadratwurzel ',
-        '≠': ' ungleich ',
-        '≤': ' kleiner oder gleich ',
-        '≥': ' größer oder gleich ',
-        '≈': ' ungefähr ',
-        '∞': ' unendlich ',
         '€': ' euro ',
         '$': ' dollar ',
         '£': ' pfund ',
@@ -345,20 +300,11 @@ def fix_vocals(text, lang='ron'):
         '>': ' supérieur à ',
         # Add more common math symbols as needed for French
         '%': ' pour cent ',
-        '∑': ' somme ',
-        '∫': ' intégrale ',
-        '√': ' racine carrée ',
-        '≠': ' n\'égale pas ',
-        '≤': ' inférieur ou égal à ',
-        '≥': ' supérieur ou égal à ',
-        '≈': ' approximativement ',
-        '∞': ' infini ',
         '€': ' euro ',
         '$': ' dollar ',
         '£': ' livre ',
         '&': ' et ',
         '@': ' arobase ',
-        '#': ' dièse ',
     }
     hun_replacements = {
@@ -380,16 +326,7 @@ def fix_vocals(text, lang='ron'):
         'pi': ' pi ',
         '<': ' kisebb mint ',
         '>': ' nagyobb mint ',
-        # Add more common math symbols as needed for Hungarian
         '%': ' százalék ',
-        '∑': ' szumma ',
-        '∫': ' integrál ',
-        '√': ' négyzetgyök ',
-        '≠': ' nem egyenlő ',
-        '≤': ' kisebb vagy egyenlő ',
-        '≥': ' nagyobb vagy egyenlő ',
-        '≈': ' körülbelül ',
-        '∞': ' végtelen ',
         '€': ' euró ',
         '$': ' dollár ',
         '£': ' font ',
@@ -406,22 +343,13 @@ def fix_vocals(text, lang='ron'):
         '^': ' εἰς τὴν δύναμιν ',
         '+': ' σὺν ',
         ' - ': ' χωρὶς ',
-        '*': ' πολλάκις ',
         ' / ': ' διαιρέω ',
         '=': ' ἴσον ',
         'pi': ' πῖ ',
         '<': ' ἔλαττον ',
         '>': ' μεῖζον ',
-        # Add more common math symbols as needed for Ancient Greek
         '%': ' τοῖς ἑκατόν ', # tois hekaton - 'of the hundred'
-        '∑': ' ἄθροισμα ',
-        '∫': ' ὁλοκλήρωμα ',
-        '√': ' τετραγωνικὴ ῥίζα ',
-        '≠': ' οὐκ ἴσον ',
-        '≤': ' ἔλαττον ἢ ἴσον ',
-        '≥': ' μεῖζον ἢ ἴσον ',
-        '≈': ' περίπου ',
-        '∞': ' ἄπειρον ',
         '€': ' εὐρώ ',
         '$': ' δολάριον ',
         '£': ' λίρα ',
@@ -512,4 +440,4 @@ def transliterate_number(number_string,
             return match.group(0)  # Return original if conversion fails
     pattern = r'([^\d]*)(\d+(\.\d+)?([Ee][+-]?\d+)?)([^\d]*)'
-    return re.sub(pattern, replace_number, number_string)

 from num2words import num2words
 from num2word_greek.numbers2words import convert_numbers
+def only_greek_or_only_latin(text,
+                             lang='grc'):
     latin_to_greek_map = {
         'a': 'α', 'b': 'β', 'g': 'γ', 'd': 'δ', 'e': 'ε',
                 current_index += 1
         return ''.join(output_chars)
 def fix_vocals(text, lang='ron'):
         '<': ' mai mic decât ',
         '>': ' mai mare decât',
         '%': ' la sută ', # percent (from previous)
         '€': ' euro ',
         '$': ' dolar ',
         '£': ' liră ',
         '&': ' și ',  # and
+        #'@': ' la ',  # at
+        #'#': ' diez ',  # hash
         '∑': ' sumă ',
         '∫': ' integrală ',
+        #'√': ' rădăcina pătrată a ', # more generic square root
     }
     eng_replacements = {
         '>': ' greater than ',
         # Additional common math symbols from previous list
         '%': ' percent ',
         '€': ' euro ',
         '$': ' dollar ',
         '£': ' pound ',
         '<': ' manje od ',
         '>': ' veće od ',
         '%': ' procenat ',
         '€': ' evro ',
         '$': ' dolar ',
         '£': ' funta ',
         # Others
         #     'rn': 'rrn',
         # 'ć': 'č',
         '>': ' größer als',
         # Additional common math symbols from previous list
         '%': ' prozent ',
         '€': ' euro ',
         '$': ' dollar ',
         '£': ' pfund ',
         '>': ' supérieur à ',
         # Add more common math symbols as needed for French
         '%': ' pour cent ',
         '€': ' euro ',
         '$': ' dollar ',
         '£': ' livre ',
         '&': ' et ',
         '@': ' arobase ',
     }
     hun_replacements = {
         'pi': ' pi ',
         '<': ' kisebb mint ',
         '>': ' nagyobb mint ',
         '%': ' százalék ',
         '€': ' euró ',
         '$': ' dollár ',
         '£': ' font ',
         '^': ' εἰς τὴν δύναμιν ',
         '+': ' σὺν ',
         ' - ': ' χωρὶς ',
+        ' * ': ' πολλάκις ',
         ' / ': ' διαιρέω ',
         '=': ' ἴσον ',
         'pi': ' πῖ ',
         '<': ' ἔλαττον ',
         '>': ' μεῖζον ',
         '%': ' τοῖς ἑκατόν ', # tois hekaton - 'of the hundred'
         '€': ' εὐρώ ',
         '$': ' δολάριον ',
         '£': ' λίρα ',
             return match.group(0)  # Return original if conversion fails
     pattern = r'([^\d]*)(\d+(\.\d+)?([Ee][+-]?\d+)?)([^\d]*)'
+    return re.sub(pattern, replace_number, number_string)