rakib72642 commited on
Commit
d2cc651
·
1 Parent(s): 9061c83
Files changed (9) hide show
  1. README.md +11 -0
  2. api_secrets.py +2 -0
  3. get_text.ipynb +298 -0
  4. get_txt.py +18 -0
  5. lamitization.py +37 -0
  6. main.py +45 -0
  7. new.py +182 -0
  8. nlp_api.py +196 -0
  9. updated_api.py +181 -0
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BAT NLP Campaign Audio Data
2
+ config the ngrok auth: ngrok config add-authtoken 2Qm8hS1zPhVXiLjEdlI4738tLzF_2QJwGJMK5oTbQD33QSVXS
3
+
4
+ ngrok http --domain=batnlp.ngrok.app 1111
5
+
6
+ --------------------------------------------------------------------------------------------------------------------------------
7
+
8
+ # Old App
9
+ config the ngrok auth: ngrok config add-authtoken 2Qm8hS1zPhVXiLjEdlI4738tLzF_2QJwGJMK5oTbQD33QSVXS
10
+
11
+ ngrok http --domain=hawkeyes.ngrok.app 8020
api_secrets.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ API_KEY_ASSEMBLYAI = '5bd662961e754f148a581e0070f09c88'
2
+ YOUR_API_TOKEN = '5bd662961e754f148a581e0070f09c88'
get_text.ipynb ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "ename": "ModuleNotFoundError",
10
+ "evalue": "No module named 'certifi'",
11
+ "output_type": "error",
12
+ "traceback": [
13
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
14
+ "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
15
+ "\u001b[1;32md:\\Projects\\BAT\\BAT_NLP_Campaign\\get_text.ipynb Cell 1\u001b[0m line \u001b[0;36m2\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Projects/BAT/BAT_NLP_Campaign/get_text.ipynb#W0sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mre\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/d%3A/Projects/BAT/BAT_NLP_Campaign/get_text.ipynb#W0sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mupdated_api\u001b[39;00m \u001b[39mimport\u001b[39;00m \u001b[39m*\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Projects/BAT/BAT_NLP_Campaign/get_text.ipynb#W0sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtyping_extensions\u001b[39;00m \u001b[39mimport\u001b[39;00m Annotated\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Projects/BAT/BAT_NLP_Campaign/get_text.ipynb#W0sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnltk\u001b[39;00m\n",
16
+ "File \u001b[1;32md:\\Projects\\BAT\\BAT_NLP_Campaign\\updated_api.py:9\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39muvicorn\u001b[39;00m\n\u001b[0;32m 8\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnltk\u001b[39;00m\n\u001b[1;32m----> 9\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mhttpx\u001b[39;00m\n\u001b[0;32m 10\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mfastapi\u001b[39;00m \u001b[39mimport\u001b[39;00m FastAPI\n\u001b[0;32m 11\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpydantic\u001b[39;00m \u001b[39mimport\u001b[39;00m BaseModel\n",
17
+ "File \u001b[1;32mc:\\Users\\naymm\\miniconda3\\envs\\nlpBat\\lib\\site-packages\\httpx\\__init__.py:2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m__version__\u001b[39;00m \u001b[39mimport\u001b[39;00m __description__, __title__, __version__\n\u001b[1;32m----> 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_api\u001b[39;00m \u001b[39mimport\u001b[39;00m delete, get, head, options, patch, post, put, request, stream\n\u001b[0;32m 3\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_auth\u001b[39;00m \u001b[39mimport\u001b[39;00m Auth, BasicAuth, DigestAuth, NetRCAuth\n\u001b[0;32m 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_client\u001b[39;00m \u001b[39mimport\u001b[39;00m USE_CLIENT_DEFAULT, AsyncClient, Client\n",
18
+ "File \u001b[1;32mc:\\Users\\naymm\\miniconda3\\envs\\nlpBat\\lib\\site-packages\\httpx\\_api.py:4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtyping\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcontextlib\u001b[39;00m \u001b[39mimport\u001b[39;00m contextmanager\n\u001b[1;32m----> 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_client\u001b[39;00m \u001b[39mimport\u001b[39;00m Client\n\u001b[0;32m 5\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_config\u001b[39;00m \u001b[39mimport\u001b[39;00m DEFAULT_TIMEOUT_CONFIG\n\u001b[0;32m 6\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_models\u001b[39;00m \u001b[39mimport\u001b[39;00m Response\n",
19
+ "File \u001b[1;32mc:\\Users\\naymm\\miniconda3\\envs\\nlpBat\\lib\\site-packages\\httpx\\_client.py:11\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m__version__\u001b[39;00m \u001b[39mimport\u001b[39;00m __version__\n\u001b[0;32m 10\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_auth\u001b[39;00m \u001b[39mimport\u001b[39;00m Auth, BasicAuth, FunctionAuth\n\u001b[1;32m---> 11\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_config\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 12\u001b[0m DEFAULT_LIMITS,\n\u001b[0;32m 13\u001b[0m DEFAULT_MAX_REDIRECTS,\n\u001b[0;32m 14\u001b[0m DEFAULT_TIMEOUT_CONFIG,\n\u001b[0;32m 15\u001b[0m Limits,\n\u001b[0;32m 16\u001b[0m Proxy,\n\u001b[0;32m 17\u001b[0m Timeout,\n\u001b[0;32m 18\u001b[0m )\n\u001b[0;32m 19\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_decoders\u001b[39;00m \u001b[39mimport\u001b[39;00m SUPPORTED_DECODERS\n\u001b[0;32m 20\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_exceptions\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 21\u001b[0m InvalidURL,\n\u001b[0;32m 22\u001b[0m RemoteProtocolError,\n\u001b[0;32m 23\u001b[0m TooManyRedirects,\n\u001b[0;32m 24\u001b[0m request_context,\n\u001b[0;32m 25\u001b[0m )\n",
20
+ "File \u001b[1;32mc:\\Users\\naymm\\miniconda3\\envs\\nlpBat\\lib\\site-packages\\httpx\\_config.py:7\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtyping\u001b[39;00m\n\u001b[0;32m 5\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpathlib\u001b[39;00m \u001b[39mimport\u001b[39;00m Path\n\u001b[1;32m----> 7\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mcertifi\u001b[39;00m\n\u001b[0;32m 9\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_compat\u001b[39;00m \u001b[39mimport\u001b[39;00m set_minimum_tls_version_1_2\n\u001b[0;32m 10\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_models\u001b[39;00m \u001b[39mimport\u001b[39;00m Headers\n",
21
+ "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'certifi'"
22
+ ]
23
+ }
24
+ ],
25
+ "source": [
26
+ "import re\n",
27
+ "from updated_api import *\n",
28
+ "from typing_extensions import Annotated\n",
29
+ "import nltk\n",
30
+ "from nltk.corpus import stopwords\n",
31
+ "from nltk.stem import WordNetLemmatizer\n",
32
+ "import string"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": null,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "nltk.download('punkt')\n",
42
+ "nltk.download('stopwords')\n",
43
+ "nltk.download('wordnet')"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 15,
49
+ "metadata": {},
50
+ "outputs": [],
51
+ "source": [
52
+ "patterns = {\n",
53
+ " 'Unique Capsule': r\"\\b(((u(?:nit|niq).*?)\\s+(?:capsul))|(?:.*?uni.*?capsul))\",\n",
54
+ " 'Refreshing Taste and Smell': r\"\\b((((ref|rif|rip|rep|ep|pre).*?)\\s+t(?:a|e|i|y)s(.*?)\\s+(sm|(?:.*?(sm|m)))(?:el|il|al|ol|.*?))|((?:in.*?)\\s+t(?:a|e|i|y)s.*?\\s+(.*?)(sm|m)(?:el|il|al|ol|ail|eal)))\",\n",
55
+ " 'Benson & Hadges Breeze':r\"\\b((b|p|v|f)(?:(an|en|a|e)(?:s|ch|t)(?:on|an|en).*?)\\s+h(?:.*?)\\s+(b|p|v|f)(?:re|ee|e))\",\n",
56
+ "}\n"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 16,
62
+ "metadata": {},
63
+ "outputs": [],
64
+ "source": [
65
+ "def nlp_bat(text):\n",
66
+ " results = {}\n",
67
+ " all_match = {}\n",
68
+ " for name, pattern in patterns.items():\n",
69
+ " matches = re.compile(pattern, text, re.IGNORECASE)\n",
70
+ " m = {name:matches}\n",
71
+ " all_match.update(m)\n",
72
+ " count = len(matches)\n",
73
+ " results[name] = count\n",
74
+ " \n",
75
+ " \n",
76
+ " print(all_match) \n",
77
+ "\n",
78
+ " return results"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 17,
84
+ "metadata": {},
85
+ "outputs": [
86
+ {
87
+ "data": {
88
+ "text/plain": [
89
+ "<coroutine object detect_audio at 0x00000255D1384900>"
90
+ ]
91
+ },
92
+ "execution_count": 17,
93
+ "metadata": {},
94
+ "output_type": "execute_result"
95
+ }
96
+ ],
97
+ "source": [
98
+ "filename = input(\"Give Audio Name: \")\n",
99
+ "audio_url = upload(filename)\n",
100
+ "\n",
101
+ "detect_audio(audio_url, 'file_title')"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": 6,
107
+ "metadata": {},
108
+ "outputs": [],
109
+ "source": [
110
+ "patterns = {\n",
111
+ " 'Unique Capsule': r\"unique capsul|unit capsul|uniq...capsul|uni..capsul\\b\",\n",
112
+ " 'Refreshing Taste and Smell': r\"refreshing taste smell|refreshing taste milk|refreshing test smell|ripe singh taste|repressing taste smell\\b\",\n",
113
+ " 'Benson & Hadges Breeze': r\"benson.hage.bree|benson.hage..bree|banson.hage.bree|banson.hage..bree|benson he.es breez|benson hess breez|benson he..e breez|benson haze breez|benson hezes bee|banson breez|banson hedge breathe|banson hedge bridge|benson hedge bre|benson hedge bridge| benson haze brie|banson haze breeze|banson hedge breez\\b\",\n",
114
+ "}"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": 7,
120
+ "metadata": {},
121
+ "outputs": [],
122
+ "source": [
123
+ "patterns = {\n",
124
+ " 'Unique Capsule': r\"\\b(?:uni(?:que)?|unit|uniq\\.+|uni\\.+)\\s*capsul\",\n",
125
+ " 'Refreshing Taste and Smell': r\"\\b(?:refreshing|ripe|repressing)\\s+(?:taste\\s+(?:smell|milk)|test\\s+smell)\",\n",
126
+ " 'Benson & Hadges Breeze':r\"\\b(?:benson\\s+h(?:ess|aze|ezes|edge)\\s+breez|banson\\s+(?:haze\\s+breez|hedge\\s+(?:breez|bre))|benson\\s+h(?:aze\\s+brie|edge\\s+bridge))\",\n",
127
+ "}"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 8,
133
+ "metadata": {},
134
+ "outputs": [],
135
+ "source": [
136
+ "patterns = {\n",
137
+ " 'Unique Capsule': r\"\\b(?:uni(?:que)?|unit|uniq\\.+|uni\\.+)\\s*capsul\",\n",
138
+ " 'Refreshing Taste and Smell': r\"\\b(?:refreshing|ripe|repressing)\\s+(?:taste\\s+(?:smell|milk)|test\\s+smell)\",\n",
139
+ " 'Benson & Hadges Breeze':r\"\\b(?:((b|p|v|f)(a|e).*?son)\\s+(h(?:.*?))\\s+(br))\",\n",
140
+ "}"
141
+ ]
142
+ },
143
+ {
144
+ "cell_type": "code",
145
+ "execution_count": null,
146
+ "metadata": {},
147
+ "outputs": [],
148
+ "source": []
149
+ },
150
+ {
151
+ "cell_type": "code",
152
+ "execution_count": 9,
153
+ "metadata": {},
154
+ "outputs": [],
155
+ "source": [
156
+ "def nlp_bat(text):\n",
157
+ " results = {}\n",
158
+ " all_match = {}\n",
159
+ " for name, pattern in patterns.items():\n",
160
+ " matches = re.findall(pattern, text, re.IGNORECASE)\n",
161
+ " m = {name:matches}\n",
162
+ " all_match.update(m)\n",
163
+ " count = len(matches)\n",
164
+ " results[name] = count\n",
165
+ " \n",
166
+ " \n",
167
+ " print(all_match) \n",
168
+ "\n",
169
+ " return results"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": 10,
175
+ "metadata": {},
176
+ "outputs": [
177
+ {
178
+ "name": "stdout",
179
+ "output_type": "stream",
180
+ "text": [
181
+ "{'Unique Capsule': ['unique capsul'], 'Refreshing Taste and Smell': ['refreshing taste smell'], 'Benson & Hadges Breeze': [('banson', 'b', 'a', 'hages niyashe ekti unique capsule offer panson hages', 'br'), ('panson', 'p', 'a', 'hages', 'br')]}\n"
182
+ ]
183
+ },
184
+ {
185
+ "data": {
186
+ "text/plain": [
187
+ "{'Unique Capsule': 1,\n",
188
+ " 'Refreshing Taste and Smell': 1,\n",
189
+ " 'Benson & Hadges Breeze': 2}"
190
+ ]
191
+ },
192
+ "execution_count": 10,
193
+ "metadata": {},
194
+ "output_type": "execute_result"
195
+ }
196
+ ],
197
+ "source": [
198
+ "text = \"Clean text : apnea janet kushihaban banson hages niyashe ekti unique capsule offer panson hages bridge panson hages breeze air capsule atom agnoton tharna refreshing taste smell darn offer tea trial cora jonu apnea ekti trial kit nitaparin thunobat\"\n",
199
+ "\n",
200
+ "nlp_bat(text)\n"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 11,
206
+ "metadata": {},
207
+ "outputs": [],
208
+ "source": [
209
+ "old_patterns = {\n",
210
+ " 'Unique Capsule': r\"\\b(?:uni(?:que)?|unit|uniq\\.+|uni\\.+)\\s*capsul\",\n",
211
+ " 'Refreshing Taste and Smell': r\"\\b(?:refreshing|ripe|repressing)\\s+(?:taste\\s+(?:smell|milk)|test\\s+smell)\",\n",
212
+ " 'Benson & Hadges Breeze': r\"\\b(?:((b|p|v|f)(a|e).*?son)\\s+(h(?:.*?))\\s+(br))\",\n",
213
+ "}"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "code",
218
+ "execution_count": 14,
219
+ "metadata": {},
220
+ "outputs": [],
221
+ "source": [
222
+ "newPattern = {\n",
223
+ " 'Unique Capsule': r\"\\b(((u(?:nit|niq).*?)\\s+(?:capsul))|(?:.*?uni.*?capsul))\",\n",
224
+ " 'Refreshing Taste and Smell': r\"\\b((((ref|rif|rip|rep|ep|pre).*?)\\s+t(?:a|e|i|y)s(.*?)\\s+(sm|(?:.*?(sm|m)))(?:el|il|al|ol|.*?))|((?:in.*?)\\s+t(?:a|e|i|y)s.*?\\s+(.*?)(sm|m)(?:el|il|al|ol|ail|eal)))\",\n",
225
+ " 'Benson & Hadges Breeze':r\"\\b((b|p|v|f)(?:(an|en|a|e)(?:s|ch|t)(?:on|an|en).*?)\\s+h(?:.*?)\\s+(b|p|v|f)(?:re|ee|e|ri))\",\n",
226
+ "}"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": null,
232
+ "metadata": {},
233
+ "outputs": [],
234
+ "source": [
235
+ "!pip install assemblyai"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": 9,
241
+ "metadata": {},
242
+ "outputs": [
243
+ {
244
+ "name": "stdout",
245
+ "output_type": "stream",
246
+ "text": [
247
+ "<assemblyai.transcriber.Transcript object at 0x0000029377EFD480>\n"
248
+ ]
249
+ }
250
+ ],
251
+ "source": [
252
+ "import assemblyai as aai\n",
253
+ "from updated_api import *\n",
254
+ "\n",
255
+ "# Replace with your API key\n",
256
+ "aai.settings.api_key = \"5bd662961e754f148a581e0070f09c88\"\n",
257
+ "\n",
258
+ "# URL of the file to transcribe\n",
259
+ "FILE_URL = \"https://form.hedigital.online/file-1702199439520-529630625.mp4\"\n",
260
+ "\n",
261
+ "# You can also transcribe a local file by passing in a file path\n",
262
+ "# FILE_URL = './path/to/file.mp3'\n",
263
+ "\n",
264
+ "transcriber = aai.Transcriber()\n",
265
+ "transcript = transcriber.transcribe(FILE_URL)\n",
266
+ "print(transcript)\n"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": null,
272
+ "metadata": {},
273
+ "outputs": [],
274
+ "source": []
275
+ }
276
+ ],
277
+ "metadata": {
278
+ "kernelspec": {
279
+ "display_name": "nlpBat",
280
+ "language": "python",
281
+ "name": "python3"
282
+ },
283
+ "language_info": {
284
+ "codemirror_mode": {
285
+ "name": "ipython",
286
+ "version": 3
287
+ },
288
+ "file_extension": ".py",
289
+ "mimetype": "text/x-python",
290
+ "name": "python",
291
+ "nbconvert_exporter": "python",
292
+ "pygments_lexer": "ipython3",
293
+ "version": "3.10.13"
294
+ }
295
+ },
296
+ "nbformat": 4,
297
+ "nbformat_minor": 2
298
+ }
get_txt.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import assemblyai as aai
2
+
3
+ aai.settings.api_key = "5bd662961e754f148a581e0070f09c88"
4
+
5
+ transcriber = aai.Transcriber()
6
+
7
+ audio_url = (
8
+ "https://bat.hedigital.online/file-1703669708657-351786808.mpeg"
9
+ )
10
+
11
+ config = aai.TranscriptionConfig(speaker_labels=True)
12
+
13
+ transcript = transcriber.transcribe(audio_url, config)
14
+
15
+ print(transcript.text)
16
+
17
+ for utterance in transcript.utterances:
18
+ print(f"Speaker {utterance.speaker}: {utterance.text}")
lamitization.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.corpus import stopwords
3
+ from nltk.stem import WordNetLemmatizer
4
+ import string
5
+
6
+ nltk.download('punkt')
7
+ nltk.download('stopwords')
8
+ nltk.download('wordnet')
9
+
10
+ def lemmatize_and_clean(text):
11
+ # Tokenize the text into words
12
+ words = nltk.word_tokenize(text)
13
+
14
+ # Remove punctuation and convert to lowercase
15
+ words = [word.lower() for word in words if word.isalpha()]
16
+
17
+ # Remove stopwords
18
+ stop_words = set(stopwords.words('english'))
19
+ words = [word for word in words if word not in stop_words]
20
+
21
+ # Lemmatize the words
22
+ lemmatizer = WordNetLemmatizer()
23
+ words = [lemmatizer.lemmatize(word) for word in words]
24
+
25
+ # Join the words back into a cleaned text
26
+ cleaned_text = ' '.join(words)
27
+
28
+ return cleaned_text
29
+
30
+ # Example usage
31
+ input_text = "kushir cover. kushir cover benson and hezes nih unique capsule of our janum benson and hesses breeze aprajanara kushiha benjay a capsule roche egg thorne refreshing taste and smell arapnajudiya trial kotachan tahal ajinita parnakti trial kit donnabat."
32
+ cleaned_text = lemmatize_and_clean(input_text)
33
+
34
+ print("Original Text:")
35
+ print(input_text)
36
+ print("\nCleaned Text:")
37
+ print(cleaned_text)
main.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from nlp_api import *
3
+ from typing_extensions import Annotated
4
+ import string
5
+
6
+ patterns = {
7
+ 'Unique Capsule': r"unique capsul|unit capsul|uniq...capsul|uni..capsul\b",
8
+ 'Refreshing Taste and Smell': r"refreshing taste smell|refreshing taste milk|refreshing test smell|ripe singh taste|repressing taste smell\b",
9
+ 'Benson & Hadges Breeze': r"benson he.es breez|benson hess breez|benson he..e breez|benson haze breez|benson hezes bee|banson breez|banson hedge breathe|banson hedge bridge|benson hedge bre|benson hedge bridge| benson haze brie|banson haze breeze|banson hedge breez\b"
10
+ }
11
+
12
+
13
+ # Find and count matches for each pattern
14
+ def nlp_bat(text):
15
+ results = {}
16
+ all_match = {}
17
+ for name, pattern in patterns.items():
18
+ matches = re.findall(pattern, text, re.IGNORECASE)
19
+ m = {name:matches}
20
+ all_match.update(m)
21
+ count = len(matches)
22
+ results[name] = count
23
+
24
+
25
+ print(all_match)
26
+
27
+ return results
28
+
29
+
30
+ # # input
31
+ filename = input("Give Audio Name: ")
32
+ audio_url = upload(filename)
33
+
34
+
35
+ # # transcribe
36
+ detect_audio(audio_url, 'file_title')
37
+ # print(text_det)
38
+ # print("xxxxxxxxx",text_det)
39
+ # text = text_det
40
+ # print(text)/
41
+ # result = nlp_bat(text)
42
+ # print(result)
43
+
44
+ # print(result)
45
+ # print(text)
new.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import re
4
+ from typing import List, Union
5
+
6
+ import aiofiles
7
+ import uvicorn
8
+ import nltk
9
+ import httpx
10
+ from fastapi import FastAPI
11
+ from pydantic import BaseModel
12
+ from nltk.corpus import stopwords
13
+ from nltk.stem import WordNetLemmatizer
14
+ import logging
15
+ import pytz
16
+ from datetime import datetime
17
+ from api_secrets import API_KEY_ASSEMBLYAI
18
+
19
+
20
+ # logging.basicConfig(filename0="BAT_NLP_Campaign.log",
21
+ # filemode='w')
22
+ # logger = logging.getLogger("BAT")
23
+ # logger.setLevel(logging.DEBUG)
24
+ # file_handler = logging.FileHandler("BAT_NLP_Campaign.log")
25
+ # logger.addHandler(file_handler)
26
+ # total_done = 0
27
+ # total_error = 0
28
+
29
+
30
+
31
+ def get_bd_time():
32
+ bd_timezone = pytz.timezone("Asia/Dhaka")
33
+ time_now = datetime.now(bd_timezone)
34
+ current_time = time_now.strftime("%I:%M:%S %p")
35
+ return current_time
36
+
37
+ app = FastAPI()
38
+
39
+ CHUNK_SIZE = 5_242_880 # 5MB
40
+
41
+ upload_endpoint = 'https://api.assemblyai.com/v2/upload'
42
+ transcript_endpoint = 'https://api.assemblyai.com/v2/transcript'
43
+
44
+ headers_auth_only = {'authorization': API_KEY_ASSEMBLYAI}
45
+
46
+ headers = {
47
+ "authorization": API_KEY_ASSEMBLYAI,
48
+ "content-type": "application/json"
49
+ }
50
+
51
+
52
+ class Item(BaseModel):
53
+ url: str
54
+
55
+
56
+ async def lemmatize_and_clean(text):
57
+ words = nltk.word_tokenize(text.lower())
58
+ words = [word for word in words if word.isalpha() and word not in set(stopwords.words('english'))]
59
+ lemmatizer = WordNetLemmatizer()
60
+ words = [await asyncio.to_thread(lemmatizer.lemmatize, word) for word in words]
61
+ return ' '.join(words)
62
+
63
+
64
+ patterns = {
65
+ 'Unique Capsule': r"\b(((u(?:nit|niq).*?)\s+(?:capsul))|(?:.*?uni.*?capsul))",
66
+ 'Refreshing Taste and Smell': r"\b((((ref|rif|rip|rep|ep|pre).*?)\s+t(?:a|e|i|y)s(.*?)\s+(sm|(?:.*?(sm|m)))(?:el|il|al|ol|.*?))|((?:in.*?)\s+t(?:a|e|i|y)s.*?\s+(.*?)(sm|m)(?:el|il|al|ol|ail|eal)))",
67
+ 'Benson & Hadges Breeze':r"\b((b|p|v|f)(?:(an|en|a|e)(?:s|ch|t)(?:on|an|en).*?)\s+h(?:.*?)\s+(b|p|v|f)(?:re|ee|e|ri))",
68
+ }
69
+
70
+
71
+ async def nlp_bat(text):
72
+ results = {}
73
+ all_match = {}
74
+ for name, pattern in patterns.items():
75
+ matches = re.findall(pattern, text, re.IGNORECASE)
76
+ all_match[name] = matches
77
+ results[name] = len(matches)
78
+
79
+ print(all_match)
80
+ return results
81
+
82
+
83
+ async def read_file(filename):
84
+ async with aiofiles.open(filename, 'rb') as f:
85
+ while True:
86
+ data = await f.read(CHUNK_SIZE)
87
+ if not data:
88
+ break
89
+ yield data
90
+
91
+
92
+ async def upload(filename):
93
+ async with httpx.AsyncClient() as client:
94
+ async for data in read_file(filename):
95
+ upload_response = await client.post(upload_endpoint, headers=headers_auth_only, data=data)
96
+ return upload_response.json()['upload_url']
97
+
98
+
99
+ async def transcribe(audio_url):
100
+ transcript_request = {'audio_url': audio_url}
101
+ async with httpx.AsyncClient() as client:
102
+ transcript_response = await client.post(transcript_endpoint, json=transcript_request, headers=headers)
103
+ return transcript_response.json()['id']
104
+
105
+
106
+ async def poll(transcript_id):
107
+ polling_endpoint = f'{transcript_endpoint}/{transcript_id}'
108
+ async with httpx.AsyncClient() as client:
109
+ polling_response = await client.get(polling_endpoint, headers=headers)
110
+ return polling_response.json()
111
+
112
+
113
+ async def get_transcription_result_url(url):
114
+ transcribe_id = await transcribe(url)
115
+ while True:
116
+ data = await poll(transcribe_id)
117
+ if data['status'] == 'completed':
118
+ return data, None
119
+ elif data['status'] == 'error':
120
+ return data, data['error']
121
+ print("Processing Audio")
122
+ await asyncio.sleep(2)
123
+
124
+
125
+ async def detect_audio(url, title):
126
+ data, error = await get_transcription_result_url(url)
127
+ text_det = data['text']
128
+ print("main text : ", text_det)
129
+ lmtz = await lemmatize_and_clean(text_det)
130
+ print("Clean text : ", lmtz)
131
+ txt = lmtz.lower()
132
+ r = await nlp_bat(txt)
133
+ return r
134
+
135
+
136
+ async def process_item(item: Item):
137
+ try:
138
+ print(item.url)
139
+ result = await detect_audio(item.url, title="file")
140
+ result = json.dumps(result)
141
+ return json.loads(result)
142
+ finally:
143
+ pass
144
+
145
+
146
+ async def process_items(items: Union[Item, List[Item]]):
147
+ if isinstance(items, list):
148
+ coroutines = [process_item(item) for item in items]
149
+ results_dict = await asyncio.gather(*coroutines)
150
+ results = {}
151
+ for item in results_dict:
152
+ results.update(item)
153
+ else:
154
+ results = await process_item(items)
155
+ return results
156
+
157
+
158
+ @app.post("/nlp")
159
+ async def create_items(items: Union[Item, List[Item]]):
160
+ try:
161
+ results = await process_items(items)
162
+ print("Result Sent to User:", results)
163
+ return results
164
+ except Exception as e:
165
+ # global total_error
166
+ # total_error += 1
167
+ # logger.info(f"Time:{get_bd_time()}, Execution Failed and Total Failed Execution : {total_error}, Payload:{items}, Response : {results}")
168
+ # logger.error(str(e))
169
+ return {"AI": f"Error: {str(e)}"}
170
+ finally:
171
+ # global total_done
172
+ # total_done +=1
173
+ # logger.info(f"Time:{get_bd_time()}, Execution Done and Total Successfull Execution : {total_done}, Payload:{items}, Response : {results}")
174
+ pass
175
+
176
+
177
+
178
+ if __name__ == "__main__":
179
+ try:
180
+ uvicorn.run(app, host="127.0.0.1", port=8020)
181
+ finally:
182
+ pass
nlp_api.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # files after part 2
2
+ import requests
3
+ import time
4
+ from api_secrets import API_KEY_ASSEMBLYAI
5
+ import re
6
+ from fastapi import FastAPI
7
+ from pydantic import BaseModel
8
+ import asyncio
9
+ from typing import List, Union
10
+ import uvicorn
11
+ import json
12
+ import nltk
13
+ from nltk.corpus import stopwords
14
+ from nltk.stem import WordNetLemmatizer
15
+ import string
16
+
17
+ # nltk.download('punkt')
18
+ # nltk.download('stopwords')
19
+ # nltk.download('wordnet')
20
+
21
+
22
+
23
+ app = FastAPI()
24
+
25
+ class Item(BaseModel):
26
+ url: str
27
+
28
+ upload_endpoint = 'https://api.assemblyai.com/v2/upload'
29
+ transcript_endpoint = 'https://api.assemblyai.com/v2/transcript'
30
+
31
+ headers_auth_only = {'authorization': API_KEY_ASSEMBLYAI}
32
+
33
+ headers = {
34
+ "authorization": API_KEY_ASSEMBLYAI,
35
+ "content-type": "application/json"
36
+ }
37
+
38
+ CHUNK_SIZE = 5_242_880 # 5MB
39
+
40
+ def lemmatize_and_clean(text):
41
+ # Tokenize the text into words
42
+ words = nltk.word_tokenize(text)
43
+
44
+ # Remove punctuation and convert to lowercase
45
+ words = [word.lower() for word in words if word.isalpha()]
46
+
47
+ # Remove stopwords
48
+ stop_words = set(stopwords.words('english'))
49
+ words = [word for word in words if word not in stop_words]
50
+
51
+ # Lemmatize the words
52
+ lemmatizer = WordNetLemmatizer()
53
+ words = [lemmatizer.lemmatize(word) for word in words]
54
+
55
+ # Join the words back into a cleaned text
56
+ cleaned_text = ' '.join(words)
57
+
58
+ return cleaned_text
59
+
60
+ # Patterns
61
+ # patterns = {
62
+ # 'smoker': r"sm.k.r|s.m.k.r\b",
63
+ # 'dhumpai': r"d.m.a.|d..mp..|.om.a.|umpa.\b",
64
+ # 'alchemy': r"al.k.m|.lch.m.\b",
65
+ # 'benson': r"..ns.n\b",
66
+ # 'goldleaf': r"go.lb|gol..lea.|g.l...|g.l../b",
67
+ # 'dunhil': r"d.n.h.l|d.nh.l|.an.i.l|.an.i.l\b",
68
+ # 'smooth': r".m..th|sm.d\b",
69
+ # 'thanda_flvr': r"th.nd..fl.v|t.nd...fl.v|th.nd...fl.v|t.nd..fl.v|..de.fl.v|.and..fl.v|..anda.fl..\b",
70
+ # 'best_tobacco': r".est.t.b..|.est..a.o|.est.o.a.o|.est.o.\b"
71
+ # }
72
+ # patterns = {
73
+ # 'Unique Capsule': r"unique capsul|unit capsul|uniq...capsul|uni..capsul\b",
74
+ # 'Refreshing Taste and Smell': r"refreshing taste smell|refreshing taste milk\b",
75
+ # 'Benson & Hadges Breeze': r"benson he.es breez|benson hess breez|benson he..e breez|benson haze breez|benson hezes bee|banson breez|banson hedge breathe|banson hedge bridge|benson hedge bre|benson hedge bridge\b"
76
+ # }
77
+
78
+ # patterns = {
79
+ # 'Unique Capsule': r"unique capsul|unit capsul|uniq...capsul|uni..capsul\b",
80
+ # 'Refreshing Taste and Smell': r"refreshing taste smell|refreshing taste milk|refreshing test smell|ripe singh taste|repressing taste smell\b",
81
+ # 'Benson & Hadges Breeze': r"benson.hage.bree|benson.hage..bree|banson.hage.bree|banson.hage..bree|benson he.es breez|benson hess breez|benson he..e breez|benson haze breez|benson hezes bee|banson breez|banson hedge breathe|banson hedge bridge|benson hedge bre|benson hedge bridge| benson haze brie|banson haze breeze|banson hedge breez\b"
82
+ # }
83
+ patterns = {
84
+ 'Unique Capsule': r'\b(?:uni(?:que)?|unit|uniq\.+|uni\.+)\s*capsul',
85
+ 'Refreshing Taste and Smell': r'\b(?:refreshing|ripe|repressing)\s+(?:taste\s+(?:smell|milk)|test\s+smell)\b',
86
+ 'Benson & Hadges Breeze': r'\b(?:benson\s+h(?:ess|aze|ezes|edge)\s+breez|banson\s+(?:haze\s+breez|hedge\s+(?:breez|bre))|benson\s+h(?:aze\s+brie|edge\s+bridge))\b',
87
+ }
88
+ # Find and count matches for each pattern
89
+ def nlp_bat(text):
90
+ results = {}
91
+ all_match = {}
92
+ for name, pattern in patterns.items():
93
+ matches = re.findall(pattern, text, re.IGNORECASE)
94
+ m = {name:matches}
95
+ all_match.update(m)
96
+ count = len(matches)
97
+ results[name] = count
98
+
99
+
100
+ print(all_match)
101
+
102
+ return results
103
+
104
+
105
+
106
+
107
+
108
+
109
+ def upload(filename):
110
+ def read_file(filename):
111
+ with open(filename, 'rb') as f:
112
+ while True:
113
+ data = f.read(CHUNK_SIZE)
114
+ if not data:
115
+ break
116
+ yield data
117
+
118
+ upload_response = requests.post(upload_endpoint, headers=headers_auth_only, data=read_file(filename))
119
+ return upload_response.json()['upload_url']
120
+
121
+
122
+ def transcribe(audio_url):
123
+ transcript_request = {
124
+ 'audio_url': audio_url
125
+ }
126
+
127
+ transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers)
128
+ return transcript_response.json()['id']
129
+
130
+
131
+ def poll(transcript_id):
132
+ polling_endpoint = transcript_endpoint + '/' + transcript_id
133
+ polling_response = requests.get(polling_endpoint, headers=headers)
134
+ return polling_response.json()
135
+
136
+
137
+ def get_transcription_result_url(url):
138
+ transcribe_id = transcribe(url)
139
+ while True:
140
+ data = poll(transcribe_id)
141
+ if data['status'] == 'completed':
142
+ return data, None
143
+ elif data['status'] == 'error':
144
+ return data, data['error']
145
+
146
+ print("Processing Audio")
147
+ time.sleep(2)
148
+
149
+
150
+ def detect_audio(url, title):
151
+ data, error = get_transcription_result_url(url)
152
+ text_det = data['text']
153
+ lmtz = lemmatize_and_clean(text_det)
154
+ print(lmtz)
155
+ txt = lmtz.lower()
156
+ r = nlp_bat(txt)
157
+ # print(txt)
158
+ # print(r)
159
+ return r
160
+
161
+
162
+ async def process_item(item: Item):
163
+ try:
164
+ print(item.url)
165
+ result = detect_audio(item.url,title="file")
166
+ result = json.dumps(result)
167
+ res = json.loads(result)
168
+ return res
169
+ finally:
170
+ pass
171
+
172
+ async def process_items(items: Union[Item, List[Item]]):
173
+ if isinstance(items, list):
174
+ coroutines = [process_item(item) for item in items]
175
+ results_dict = await asyncio.gather(*coroutines)
176
+ results = {}
177
+ for item in results_dict:
178
+ results.update(item)
179
+ else:
180
+ results = await process_item(items)
181
+ return results
182
+
183
+ @app.post("/nlp")
184
+ async def create_items(items: Union[Item, List[Item]]):
185
+ try:
186
+ results = await process_items(items)
187
+ print("Result Sent to User:", results)
188
+ return results
189
+ finally:
190
+ pass
191
+
192
+ if __name__ == "__main__":
193
+ try:
194
+ uvicorn.run(app, host="127.0.0.1", port=8020)
195
+ finally:
196
+ pass
updated_api.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import re
4
+ from typing import List, Union
5
+
6
+ import aiofiles
7
+ import uvicorn
8
+ import nltk
9
+ import httpx
10
+ from fastapi import FastAPI
11
+ from pydantic import BaseModel
12
+ from nltk.corpus import stopwords
13
+ from nltk.stem import WordNetLemmatizer
14
+ import logging
15
+ import pytz
16
+ from datetime import datetime
17
+ from api_secrets import API_KEY_ASSEMBLYAI
18
+
19
+
20
+ # logging.basicConfig(filename0="BAT_NLP_Campaign.log",
21
+ # filemode='w')
22
+ # logger = logging.getLogger("BAT")
23
+ # logger.setLevel(logging.DEBUG)
24
+ # file_handler = logging.FileHandler("BAT_NLP_Campaign.log")
25
+ # logger.addHandler(file_handler)
26
+ # total_done = 0
27
+ # total_error = 0
28
+
29
+
30
+
31
+ def get_bd_time():
32
+ bd_timezone = pytz.timezone("Asia/Dhaka")
33
+ time_now = datetime.now(bd_timezone)
34
+ current_time = time_now.strftime("%I:%M:%S %p")
35
+ return current_time
36
+
37
+ app = FastAPI()
38
+
39
+ CHUNK_SIZE = 5_242_880 # 5MB
40
+
41
+ upload_endpoint = 'https://api.assemblyai.com/v2/upload'
42
+ transcript_endpoint = 'https://api.assemblyai.com/v2/transcript'
43
+
44
+ headers_auth_only = {'authorization': API_KEY_ASSEMBLYAI}
45
+
46
+ headers = {
47
+ "authorization": API_KEY_ASSEMBLYAI,
48
+ "content-type": "application/json"
49
+ }
50
+
51
+
52
+ class Item(BaseModel):
53
+ url: str
54
+
55
+
56
+ async def lemmatize_and_clean(text):
57
+ words = nltk.word_tokenize(text.lower())
58
+ words = [word for word in words if word.isalpha() and word not in set(stopwords.words('english'))]
59
+ lemmatizer = WordNetLemmatizer()
60
+ words = [await asyncio.to_thread(lemmatizer.lemmatize, word) for word in words]
61
+ return ' '.join(words)
62
+
63
+
64
+ patterns = {
65
+ 'Unique Capsule': r"\b(((u(?:nit|niq).*?)\s+(?:capsul))|(?:.*?uni.*?capsul))",
66
+ 'Refreshing Taste and Smell': r"\b((((ref|rif|rip|rep|ep|pre).*?)\s+t(?:a|e|i|y)s(.*?)\s+(sm|(?:.*?(sm|m)))(?:el|il|al|ol|.*?))|((?:in.*?)\s+t(?:a|e|i|y)s.*?\s+(.*?)(sm|m)(?:el|il|al|ol|ail|eal)))",
67
+ 'Benson & Hadges Breeze':r"\b((b|p|v|f)(?:(an|en|a|e)(?:s|ch|t)(?:on|an|en).*?)\s+h(?:.*?)\s+(b|p|v|f)(?:re|ee|e|ri))",
68
+ }
69
+
70
+
71
+ async def nlp_bat(text):
72
+ results = {}
73
+ all_match = {}
74
+ for name, pattern in patterns.items():
75
+ matches = re.findall(pattern, text, re.IGNORECASE)
76
+ all_match[name] = matches
77
+ results[name] = len(matches)
78
+
79
+ print(all_match)
80
+ return results
81
+
82
+
83
+ async def read_file(filename):
84
+ async with aiofiles.open(filename, 'rb') as f:
85
+ while True:
86
+ data = await f.read(CHUNK_SIZE)
87
+ if not data:
88
+ break
89
+ yield data
90
+
91
+
92
+ async def upload(filename):
93
+ async with httpx.AsyncClient() as client:
94
+ async for data in read_file(filename):
95
+ upload_response = await client.post(upload_endpoint, headers=headers_auth_only, data=data)
96
+ return upload_response.json()['upload_url']
97
+
98
+
99
+ async def transcribe(audio_url):
100
+ transcript_request = {'audio_url': audio_url}
101
+ async with httpx.AsyncClient() as client:
102
+ transcript_response = await client.post(transcript_endpoint, json=transcript_request, headers=headers)
103
+ return transcript_response.json()['id']
104
+
105
+
106
+ async def poll(transcript_id):
107
+ polling_endpoint = f'{transcript_endpoint}/{transcript_id}'
108
+ async with httpx.AsyncClient() as client:
109
+ polling_response = await client.get(polling_endpoint, headers=headers)
110
+ return polling_response.json()
111
+
112
+
113
+ async def get_transcription_result_url(url):
114
+ transcribe_id = await transcribe(url)
115
+ while True:
116
+ data = await poll(transcribe_id)
117
+ if data['status'] == 'completed':
118
+ return data, None
119
+ elif data['status'] == 'error':
120
+ return data, data['error']
121
+ print("Processing Audio")
122
+ await asyncio.sleep(2)
123
+
124
+
125
+ async def detect_audio(url, title):
126
+ data, error = await get_transcription_result_url(url)
127
+ text_det = data['text']
128
+ print("main text : ", text_det)
129
+ lmtz = await lemmatize_and_clean(text_det)
130
+ print("Clean text : ", lmtz)
131
+ txt = lmtz.lower()
132
+ r = await nlp_bat(txt)
133
+ return r
134
+
135
+
136
+ async def process_item(item: Item):
137
+ try:
138
+ print(item.url)
139
+ result = await detect_audio(item.url, title="file")
140
+ result = json.dumps(result)
141
+ return json.loads(result)
142
+ finally:
143
+ pass
144
+
145
+
146
+ async def process_items(items: Union[Item, List[Item]]):
147
+ if isinstance(items, list):
148
+ coroutines = [process_item(item) for item in items]
149
+ results_dict = await asyncio.gather(*coroutines)
150
+ results = {}
151
+ for item in results_dict:
152
+ results.update(item)
153
+ else:
154
+ results = await process_item(items)
155
+ return results
156
+
157
+
158
+ @app.post("/nlp")
159
+ async def create_items(items: Union[Item, List[Item]]):
160
+ try:
161
+ results = await process_items(items)
162
+ print("Result Sent to User:", results)
163
+ return results
164
+ except Exception as e:
165
+ # global total_error
166
+ # total_error += 1
167
+ # logger.info(f"Time:{get_bd_time()}, Execution Failed and Total Failed Execution : {total_error}, Payload:{items}, Response : {results}")
168
+ # logger.error(str(e))
169
+ return {"AI": f"Error: {str(e)}"}
170
+ finally:
171
+ # global total_done
172
+ # total_done +=1
173
+ # logger.info(f"Time:{get_bd_time()}, Execution Done and Total Successfull Execution : {total_done}, Payload:{items}, Response : {results}")
174
+ pass
175
+
176
+
177
+ if __name__ == "__main__":
178
+ try:
179
+ uvicorn.run(app, host="127.0.0.1", port=1111)
180
+ finally:
181
+ pass