Spaces:

cdactvm
/

Hindi_ASR

Build error

App Files Files Community

cdactvm commited on Sep 27, 2024

Commit

5908f54

verified ·

1 Parent(s): 62b1787

Delete text2int.ipynb

Browse files

Files changed (1) hide show

text2int.ipynb +0 -232

text2int.ipynb DELETED Viewed

@@ -1,232 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "61185b34-45e0-4a78-a84b-2cedd08ad39a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# # Function to convert Hindi text to numerical representation\n",
-    "# from isNumber import is_number\n",
-    "\n",
-    "# def text_to_int (textnum, numwords={}):\n",
-    "#     units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',\n",
-    "#             'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',\n",
-    "#             'sixteen', 'seventeen', 'eighteen', 'nineteen']\n",
-    "#     tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']\n",
-    "#     scales = ['hundred', 'thousand', 'lac','million', 'billion', 'trillion']\n",
-    "#     ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}\n",
-    "#     ordinal_endings = [('ieth', 'y'), ('th', '')]\n",
-    "\n",
-    "#     if not numwords:\n",
-    "#         numwords['and'] = (1, 0)\n",
-    "#         for idx, word in enumerate(units): numwords[word] = (1, idx)\n",
-    "#         for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)\n",
-    "#         for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)\n",
-    "\n",
-    "#     textnum = textnum.replace('-', ' ')\n",
-    "\n",
-    "#     current = result = 0\n",
-    "#     curstring = ''\n",
-    "#     onnumber = False\n",
-    "#     lastunit = False\n",
-    "#     lastscale = False\n",
-    "\n",
-    "#     def is_numword(x):\n",
-    "#         if is_number(x):\n",
-    "#             return True\n",
-    "#         if word in numwords:\n",
-    "#             return True\n",
-    "#         return False\n",
-    "\n",
-    "#     def from_numword(x):\n",
-    "#         if is_number(x):\n",
-    "#             scale = 0\n",
-    "#             increment = int(x.replace(',', ''))\n",
-    "#             return scale, increment\n",
-    "#         return numwords[x]\n",
-    "\n",
-    "#     for word in textnum.split():\n",
-    "#         if word in ordinal_words:\n",
-    "#             scale, increment = (1, ordinal_words[word])\n",
-    "#             current = current * scale + increment\n",
-    "#             if scale > 100:\n",
-    "#                 result += current\n",
-    "#                 current = 0\n",
-    "#             onnumber = True\n",
-    "#             lastunit = False\n",
-    "#             lastscale = False\n",
-    "#         else:\n",
-    "#             for ending, replacement in ordinal_endings:\n",
-    "#                 if word.endswith(ending):\n",
-    "#                     word = \"%s%s\" % (word[:-len(ending)], replacement)\n",
-    "\n",
-    "#             if (not is_numword(word)) or (word == 'and' and not lastscale):\n",
-    "#                 if onnumber:\n",
-    "#                     # Flush the current number we are building\n",
-    "#                     curstring += repr(result + current) + \" \"\n",
-    "#                 curstring += word + \" \"\n",
-    "#                 result = current = 0\n",
-    "#                 onnumber = False\n",
-    "#                 lastunit = False\n",
-    "#                 lastscale = False\n",
-    "#             else:\n",
-    "#                 scale, increment = from_numword(word)\n",
-    "#                 onnumber = True\n",
-    "\n",
-    "#                 if lastunit and (word not in scales):                                                                                                                                                                                                                                         \n",
-    "#                     # Assume this is part of a string of individual numbers to                                                                                                                                                                                                                \n",
-    "#                     # be flushed, such as a zipcode \"one two three four five\"                                                                                                                                                                                                                 \n",
-    "#                     curstring += repr(result + current)                                                                                                                                                                                                                                       \n",
-    "#                     result = current = 0                                                                                                                                                                                                                                                      \n",
-    "\n",
-    "#                 if scale > 1:                                                                                                                                                                                                                                                                 \n",
-    "#                     current = max(1, current)                                                                                                                                                                                                                                                 \n",
-    "\n",
-    "#                 current = current * scale + increment                                                                                                                                                                                                                                         \n",
-    "#                 if scale > 100:                                                                                                                                                                                                                                                               \n",
-    "#                     result += current                                                                                                                                                                                                                                                         \n",
-    "#                     current = 0                                                                                                                                                                                                                                                               \n",
-    "\n",
-    "#                 lastscale = False                                                                                                                                                                                                              \n",
-    "#                 lastunit = False                                                                                                                                                \n",
-    "#                 if word in scales:                                                                                                                                                                                                             \n",
-    "#                     lastscale = True                                                                                                                                                                                                         \n",
-    "#                 elif word in units:                                                                                                                                                                                                             \n",
-    "#                     lastunit = True\n",
-    "\n",
-    "#     if onnumber:\n",
-    "#         curstring += repr(result + current)\n",
-    "\n",
-    "#     return curstring\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a87b26d7-4a0e-4fdc-b03e-1537600faf65",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from isNumber import is_number  # Remove or replace this if unnecessary\n",
-    "\n",
-    "def text_to_int(textnum, numwords={}):\n",
-    "    # Define units, tens, and scales including \"lac\"\n",
-    "    units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',\n",
-    "            'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',\n",
-    "            'sixteen', 'seventeen', 'eighteen', 'nineteen']\n",
-    "    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']\n",
-    "    scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion']  # \"lac\" added\n",
-    "    ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}\n",
-    "    ordinal_endings = [('ieth', 'y'), ('th', '')]\n",
-    "\n",
-    "    if not numwords:\n",
-    "        numwords['and'] = (1, 0)  # Handle \"one hundred and twenty\"\n",
-    "        \n",
-    "        # Add units, tens, and scales to numwords\n",
-    "        for idx, word in enumerate(units):\n",
-    "            numwords[word] = (1, idx)\n",
-    "        for idx, word in enumerate(tens):\n",
-    "            numwords[word] = (1, idx * 10)\n",
-    "        \n",
-    "        for idx, word in enumerate(scales):\n",
-    "            numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0)  # Handle \"lac\" as 10^5\n",
-    "\n",
-    "    # Remove hyphens and normalize input\n",
-    "    textnum = textnum.replace('-', ' ')\n",
-    "\n",
-    "    current = result = 0\n",
-    "    curstring = ''\n",
-    "    onnumber = False\n",
-    "    lastunit = False\n",
-    "    lastscale = False\n",
-    "\n",
-    "    def is_numword(x):\n",
-    "        return is_number(x) or x in numwords\n",
-    "\n",
-    "    def from_numword(x):\n",
-    "        if is_number(x):\n",
-    "            return 0, int(x.replace(',', ''))\n",
-    "        return numwords[x]\n",
-    "\n",
-    "    for word in textnum.split():\n",
-    "        if word in ordinal_words:\n",
-    "            scale, increment = (1, ordinal_words[word])\n",
-    "            current = current * scale + increment\n",
-    "            if scale > 100:\n",
-    "                result += current\n",
-    "                current = 0\n",
-    "            onnumber = True\n",
-    "            lastunit = False\n",
-    "            lastscale = False\n",
-    "        else:\n",
-    "            for ending, replacement in ordinal_endings:\n",
-    "                if word.endswith(ending):\n",
-    "                    word = f\"{word[:-len(ending)]}{replacement}\"\n",
-    "\n",
-    "            if not is_numword(word) or (word == 'and' and not lastscale):\n",
-    "                if onnumber:\n",
-    "                    curstring += repr(result + current) + \" \"\n",
-    "                curstring += word + \" \"\n",
-    "                result = current = 0\n",
-    "                onnumber = False\n",
-    "                lastunit = False\n",
-    "                lastscale = False\n",
-    "            else:\n",
-    "                scale, increment = from_numword(word)\n",
-    "                onnumber = True\n",
-    "\n",
-    "                if lastunit and word not in scales:\n",
-    "                    curstring += repr(result + current) + \" \"\n",
-    "                    result = current = 0\n",
-    "\n",
-    "                if scale > 1:\n",
-    "                    current = max(1, current)\n",
-    "\n",
-    "                current = current * scale + increment\n",
-    "\n",
-    "                if scale >= 100:\n",
-    "                    result += current\n",
-    "                    current = 0\n",
-    "\n",
-    "                lastscale = word in scales\n",
-    "                lastunit = word in units\n",
-    "\n",
-    "    if onnumber:\n",
-    "        curstring += repr(result + current)\n",
-    "\n",
-    "    return curstring.strip()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "83997c73-e1b4-4863-b1df-d6de6153e80d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}