CORVO-AI commited on
Commit
485895c
·
verified ·
1 Parent(s): f129942

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -85
app.py CHANGED
@@ -5,13 +5,13 @@ import string
5
  import time
6
 
7
  app = Flask(__name__)
 
8
  # Global variables to store workspace and bot IDs
9
  GLOBAL_WORKSPACE_ID = None
10
  GLOBAL_BOT_ID = None
11
 
12
- # cookie value
13
- AUTH_COOKIE = "pscd=try.botpress.com; _hjSessionUser_2931810=eyJpZCI6ImQ2MGMzYjhkLTlkMjQtNTA0OS1hMzlmLWEzNmI0NzA0NzUxNCIsImNyZWF0ZWQiOjE3MzU3MTg0MDcwNTAsImV4aXN0aW5nIjp0cnVlfQ==; hubspotutk=75739411a4d011b2164c4f3d944ecb94; intercom-device-id-bjzkw2xf=afd0a36b-b229-44e3-828e-60483c80c10c; _hjSessionUser_3339867=eyJpZCI6IjU4ODlmMTY4LWRkNGEtNTJhZS1hZTUzLWZlYWQwM2ZmMTVjNyIsImNyZWF0ZWQiOjE3MzU3MTg1ODM4MDgsImV4aXN0aW5nIjp0cnVlfQ==; __hstc=59821234.75739411a4d011b2164c4f3d944ecb94.1735718442141.1746194848836.1746538539919.88; mp_1195923e954ce61d822842b5832047cd_mixpanel=%7B%22distinct_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%2C%22%24device_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%2C%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fapp.botpress.cloud%2F%22%2C%22%24initial_referring_domain%22%3A%20%22app.botpress.cloud%22%2C%22__mps%22%3A%20%7B%7D%2C%22__mpso%22%3A%20%7B%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fapp.botpress.cloud%2F%22%2C%22%24initial_referring_domain%22%3A%20%22app.botpress.cloud%22%7D%2C%22__mpus%22%3A%20%7B%7D%2C%22__mpa%22%3A%20%7B%7D%2C%22__mpu%22%3A%20%7B%7D%2C%22__mpr%22%3A%20%5B%5D%2C%22__mpap%22%3A%20%5B%5D%2C%22%24user_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%7D; intercom-session-bjzkw2xf=TTdnZGNWUC9xNXMreE80NXhRZFNZS0pyUEdsbkJRc2JMcXdGZmcveVRPYkxZTmVnVnhqMUhJTWlDcEpVcWljeDZVYVVSblN4YnV5S0xBdWxDd2swQjZiaUZTeWl5M1psRmtoUWJwUU9FSFE9LS1Bay9zNldJTmVhUFdwMFNReFRmcXB3PT0=--68abc5394d7aab99748f3e451637cab5d9152a4c; _ga=GA1.2.1726154447.1735718383; _gid=GA1.2.1619749406.1746821934; _gat_UA-226900660-1=1; _gat_UA-226900660-2=1; _ga_W6YT9YSNLH=GS2.2.s1746821948$o89$g0$t1746821948$j0$l0$h0; _ga_CYSS87Q508=GS2.2.s1746821949$o89$g0$t1746821949$j0$l0$h0; _hjSession_2931810=eyJpZCI6IjA1YTlkMzY3LWNiZWEtNGQ3OC04YzNiLTEzNTFjNjkxYzViZSIsImMiOjE3NDY4MjE5NTU2NjYsInMiOjAsInIiOjAsInNiIjowLCJzciI6MCwic2UiOjAsImZzIjowLCJzcCI6MH0=; _ga_PCC6TBWJY6=GS2.1.s1746821926$o120$g1$t1746821960$j0$l0$h0; _ga_HKHSWES9V9=GS2.1.s1746821929$o120$g1$t1746821960$j29$l0$h193244206; csrf_token_bd9ac21c34b9f0915e733c3e5305d737d0722c1168be7376b889426b5ec2a298=pIDxu4npODUGEpo7JHVQrKZ4GFaa3U+3BpgxaV5hcVw=; ory_kratos_session=MTc0NjgyMjAyOHxfUmdMaVlTQXVfSmxlT1lJSGpyU2FhbjVUTHg0R0ZsQWgtVm00M3pHcXZwVG9yNW1qRDJheUFGaFZvNmFEUVdBOThQR014RjJJbmhUMmhIV1I1ME5UVHZkTDNpMUMtQlRjZ1ZTbE55M19Pb2dHTF9vQlJoSGlBQnRRWUp0M1ZUdnVvcENLeVhOTllWNk1zMk11bFVPOWFrTzJMTTdxMmVteUozVVRDMWE5TVIxbDgzU3dUY2VQaDBRWDN4bDJUVm8yUkZQa19sb09GbzlFZHF2MDFQcVR6bVVWVVpDLXVoQ1lXMEh2LV9Sd2VNZXM1cjM4TGZPVTJqdW5xNTBETTBDYkppU0xNU2xicUk3Z2EyMnFkVmdyQT09fEYiv7pXcfXVnpIFi4JLGgDObQAchyJCoAwGDSkFkoX7; ajs_user_id=d403ad7b-ea73-4d29-b977-5fd95afd585c; ajs_anonymous_id=cda6139d-cb82-4906-bfac-adaea115b097"
14
-
15
 
16
  # -------------------------------------------------------------------
17
  # Helper functions for random bot/workspace names
@@ -177,73 +177,83 @@ def delete_workspace(workspace_id):
177
 
178
 
179
  # -------------------------------------------------------------------
180
- # Function to transcribe audio
181
  # -------------------------------------------------------------------
182
- def transcribe_audio(file_url, language="", prompt="get all text from this Audio", temperature=0, bot_id=None, workspace_id=None):
183
  """
184
- Sends an audio file URL to the Botpress API for transcription
185
- Uses bot/workspace management for authentication
186
  """
187
  # Prepare the headers
188
  headers = {
189
  "User-Agent": "Mozilla/5.0",
190
- "Content-Type": "application/json",
191
  "x-bot-id": bot_id,
 
 
192
  "Cookie": AUTH_COOKIE
193
  }
194
 
195
- # Add bot ID to headers if available
196
- if bot_id:
197
- headers["x-bot-id"] = bot_id
198
-
199
- # Prepare the payload for audio transcription
200
  payload = {
201
  "type": "openai:transcribeAudio",
202
  "input": {
203
  "fileUrl": file_url,
204
- "language": language,
205
  "prompt": prompt,
206
- "temperature": temperature
207
  }
208
  }
209
 
210
  botpress_url = "https://api.botpress.cloud/v1/chat/actions"
211
  max_retries = 3
212
- timeout = 180 # Longer timeout for audio processing
213
 
214
- # Flag to track if we need to create new IDs
215
- need_new_ids = False
216
 
217
  # Attempt to send the request
218
  for attempt in range(max_retries):
219
  try:
220
- print(f"Attempt {attempt+1}: Sending transcription request to Botpress API")
221
- if bot_id:
222
- print(f"Using bot_id={bot_id}, workspace_id={workspace_id}")
223
-
224
  response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
225
 
226
  # If successful (200)
227
  if response.status_code == 200:
228
  data = response.json()
229
 
230
- # Extract the transcription text from all segments
 
231
  segments = data.get('output', {}).get('segments', [])
232
- transcription = " ".join([segment.get('text', '').strip() for segment in segments])
 
 
 
233
 
 
234
  print(f"Successfully received transcription from Botpress API")
235
- return transcription, data, bot_id, workspace_id
236
-
237
- # Handle 403/404 errors (authentication/authorization issue)
238
- elif response.status_code in [403, 404]:
239
- print(f"Received {response.status_code} error. Need to create new IDs.")
240
- need_new_ids = True
241
- break
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  # Handle network errors or timeouts (just retry)
244
- elif response.status_code in [443, 408, 502, 503, 504]:
245
  print(f"Received error {response.status_code}. Retrying...")
246
- time.sleep(3)
247
  continue
248
 
249
  # Any other error status code
@@ -253,70 +263,91 @@ def transcribe_audio(file_url, language="", prompt="get all text from this Audio
253
  time.sleep(2)
254
  continue
255
  else:
256
- return f"Unable to transcribe audio (Error {response.status_code}).", None, bot_id, workspace_id
257
 
258
  except requests.exceptions.Timeout:
259
- print(f"Transcription request timed out. Retrying...")
260
  if attempt < max_retries - 1:
261
- time.sleep(3)
262
  continue
263
  else:
264
- return "The transcription is taking too long. Please try with a shorter audio file.", None, bot_id, workspace_id
265
 
266
  except Exception as e:
267
- print(f"Error during transcription request: {str(e)}")
268
  if attempt < max_retries - 1:
269
- time.sleep(3)
270
  continue
271
  else:
272
- return f"Unable to transcribe audio: {str(e)}", None, bot_id, workspace_id
273
 
274
- # If we need new IDs, create them and try again
275
- if need_new_ids:
276
- print("Creating new workspace and bot IDs for transcription...")
277
 
278
- # First, try to clean up old resources
279
  if bot_id and workspace_id:
280
- delete_bot(bot_id, workspace_id)
281
- delete_workspace(workspace_id)
 
 
 
 
 
 
 
 
 
 
 
282
 
283
- # Create new resources
284
  new_workspace_id = create_workspace()
285
  if not new_workspace_id:
286
- return "Failed to create a new workspace for transcription. Please try again later.", None, bot_id, workspace_id
287
 
 
288
  new_bot_id = create_bot(new_workspace_id)
289
  if not new_bot_id:
290
- return "Failed to create a new bot for transcription. Please try again later.", None, new_workspace_id, workspace_id
291
 
292
- # Update headers with new bot ID
293
  headers["x-bot-id"] = new_bot_id
 
294
 
295
  # Try one more time with the new IDs
296
  try:
297
- print(f"Retrying transcription with new bot_id={new_bot_id}, workspace_id={new_workspace_id}")
298
  retry_response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
299
 
300
  if retry_response.status_code == 200:
301
  data = retry_response.json()
 
 
 
302
  segments = data.get('output', {}).get('segments', [])
303
- transcription = " ".join([segment.get('text', '').strip() for segment in segments])
 
 
 
 
 
304
  print(f"Successfully received transcription with new IDs")
305
- return transcription, data, new_bot_id, new_workspace_id
306
  else:
307
- print(f"Transcription failed with new IDs: {retry_response.status_code}, {retry_response.text}")
308
- return f"Unable to transcribe audio with new credentials.", None, new_bot_id, new_workspace_id
309
 
310
  except Exception as e:
311
- print(f"Error with new IDs during transcription: {str(e)}")
312
- return f"Unable to transcribe audio with new credentials: {str(e)}", None, new_bot_id, new_workspace_id
313
 
314
  # Should not reach here due to the handling in the loop
315
- return "Unable to transcribe audio.", None, bot_id, workspace_id
316
 
317
 
318
  # -------------------------------------------------------------------
319
- # Flask Endpoint for Transcription
320
  # -------------------------------------------------------------------
321
  @app.route("/transcribe", methods=["POST"])
322
  def transcribe_endpoint():
@@ -324,15 +355,11 @@ def transcribe_endpoint():
324
  Expects JSON with:
325
  {
326
  "file_url": "string",
327
- "language": "string" (optional),
328
- "prompt": "string" (optional),
329
- "temperature": float (optional),
330
- "include_full_response": boolean (optional)
331
  }
332
  Returns JSON with:
333
  {
334
- "transcription": "string",
335
- "full_response": {} (optional)
336
  }
337
  """
338
  global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
@@ -340,51 +367,38 @@ def transcribe_endpoint():
340
  # Parse JSON from request
341
  data = request.get_json(force=True)
342
  file_url = data.get("file_url", "")
343
- language = data.get("language", "")
344
- prompt = data.get("prompt", "get all text from this Audio")
345
- temperature = data.get("temperature", 0)
346
- include_full_response = data.get("include_full_response", False)
347
 
348
- # Validate input
349
  if not file_url:
350
  return jsonify({"error": "Missing file_url parameter"}), 400
351
 
352
  # If we don't yet have a workspace or bot, create them
353
  if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
354
- print("No existing IDs found for transcription. Creating new workspace and bot...")
355
  GLOBAL_WORKSPACE_ID = create_workspace()
356
  if GLOBAL_WORKSPACE_ID:
357
  GLOBAL_BOT_ID = create_bot(GLOBAL_WORKSPACE_ID)
358
 
359
  # If creation failed
360
  if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
361
- return jsonify({"transcription": "Transcription service is currently unavailable. Please try again later."}), 500
362
 
363
- # Call our function that interacts with Botpress API for transcription
364
  print(f"Sending transcription request with existing bot_id={GLOBAL_BOT_ID}, workspace_id={GLOBAL_WORKSPACE_ID}")
365
- transcription, full_data, updated_bot_id, updated_workspace_id = transcribe_audio(
366
  file_url,
367
- language,
368
  prompt,
369
- temperature,
370
  GLOBAL_BOT_ID,
371
  GLOBAL_WORKSPACE_ID
372
  )
373
 
374
  # Update global IDs if they changed
375
  if updated_bot_id != GLOBAL_BOT_ID or updated_workspace_id != GLOBAL_WORKSPACE_ID:
376
- print(f"Updating global IDs after transcription: bot_id={updated_bot_id}, workspace_id={updated_workspace_id}")
377
  GLOBAL_BOT_ID = updated_bot_id
378
  GLOBAL_WORKSPACE_ID = updated_workspace_id
379
 
380
- # Prepare the response
381
- response = {"transcription": transcription}
382
-
383
- # Include full response data if requested
384
- if include_full_response and full_data:
385
- response["full_response"] = full_data
386
-
387
- return jsonify(response)
388
 
389
 
390
  # -------------------------------------------------------------------
 
5
  import time
6
 
7
  app = Flask(__name__)
8
+
9
  # Global variables to store workspace and bot IDs
10
  GLOBAL_WORKSPACE_ID = None
11
  GLOBAL_BOT_ID = None
12
 
13
+ # Cookie value used in requests (should be updated with a valid cookie)
14
+ AUTH_COOKIE = ""
 
15
 
16
  # -------------------------------------------------------------------
17
  # Helper functions for random bot/workspace names
 
177
 
178
 
179
  # -------------------------------------------------------------------
180
+ # Main function that calls the Botpress API endpoint for audio transcription
181
  # -------------------------------------------------------------------
182
+ def transcribe_audio(file_url, prompt, bot_id, workspace_id):
183
  """
184
+ Sends the audio file URL to the Botpress API endpoint for transcription,
185
+ returns the transcription text and (possibly updated) bot/workspace IDs.
186
  """
187
  # Prepare the headers
188
  headers = {
189
  "User-Agent": "Mozilla/5.0",
 
190
  "x-bot-id": bot_id,
191
+ "x-workspace-id": workspace_id,
192
+ "Content-Type": "application/json",
193
  "Cookie": AUTH_COOKIE
194
  }
195
 
196
+ # Prepare the payload for the API
 
 
 
 
197
  payload = {
198
  "type": "openai:transcribeAudio",
199
  "input": {
200
  "fileUrl": file_url,
 
201
  "prompt": prompt,
202
+ "temperature": 0
203
  }
204
  }
205
 
206
  botpress_url = "https://api.botpress.cloud/v1/chat/actions"
207
  max_retries = 3
208
+ timeout = 120 # Increased timeout for long audio files
209
 
210
+ # Flag to track if we need to create new IDs due to quota exceeded
211
+ quota_exceeded = False
212
 
213
  # Attempt to send the request
214
  for attempt in range(max_retries):
215
  try:
216
+ print(f"Attempt {attempt+1}: Sending transcription request to Botpress API with bot_id={bot_id}, workspace_id={workspace_id}")
 
 
 
217
  response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
218
 
219
  # If successful (200)
220
  if response.status_code == 200:
221
  data = response.json()
222
 
223
+ # Extract all text segments from the response
224
+ transcription_text = ""
225
  segments = data.get('output', {}).get('segments', [])
226
+ for segment in segments:
227
+ segment_text = segment.get('text', '')
228
+ if segment_text:
229
+ transcription_text += segment_text + " "
230
 
231
+ transcription_text = transcription_text.strip()
232
  print(f"Successfully received transcription from Botpress API")
233
+ return transcription_text, bot_id, workspace_id
234
+
235
+ # Check for quota exceeded error specifically
236
+ elif response.status_code == 403:
237
+ error_data = response.json()
238
+ error_message = error_data.get('message', '')
239
+
240
+ # Check if this is the specific quota exceeded error
241
+ if "has reached its usage limit for ai spend" in error_message:
242
+ print(f"Quota exceeded error detected: {error_message}")
243
+ quota_exceeded = True
244
+ break
245
+ else:
246
+ print(f"Received 403 error but not quota exceeded: {error_message}")
247
+ if attempt < max_retries - 1:
248
+ time.sleep(2)
249
+ continue
250
+ else:
251
+ return f"Unable to transcribe the audio (Error 403).", bot_id, workspace_id
252
 
253
  # Handle network errors or timeouts (just retry)
254
+ elif response.status_code in [404, 408, 502, 503, 504]:
255
  print(f"Received error {response.status_code}. Retrying...")
256
+ time.sleep(3) # Wait before retrying
257
  continue
258
 
259
  # Any other error status code
 
263
  time.sleep(2)
264
  continue
265
  else:
266
+ return f"Unable to transcribe the audio (Error {response.status_code}).", bot_id, workspace_id
267
 
268
  except requests.exceptions.Timeout:
269
+ print(f"Request timed out. Retrying...")
270
  if attempt < max_retries - 1:
271
+ time.sleep(2)
272
  continue
273
  else:
274
+ return "The transcription is taking too long. Please try again with a shorter audio file.", bot_id, workspace_id
275
 
276
  except Exception as e:
277
+ print(f"Error during request: {str(e)}")
278
  if attempt < max_retries - 1:
279
+ time.sleep(2)
280
  continue
281
  else:
282
+ return f"Unable to transcribe the audio: {str(e)}", bot_id, workspace_id
283
 
284
+ # If quota exceeded, we need to create new resources
285
+ if quota_exceeded:
286
+ print("Quota exceeded. Creating new workspace and bot...")
287
 
288
+ # First delete the bot, then the workspace (in that order)
289
  if bot_id and workspace_id:
290
+ print(f"Deleting bot {bot_id} first...")
291
+ delete_success = delete_bot(bot_id, workspace_id)
292
+ if delete_success:
293
+ print(f"Successfully deleted bot {bot_id}")
294
+ else:
295
+ print(f"Failed to delete bot {bot_id}")
296
+
297
+ print(f"Now deleting workspace {workspace_id}...")
298
+ ws_delete_success = delete_workspace(workspace_id)
299
+ if ws_delete_success:
300
+ print(f"Successfully deleted workspace {workspace_id}")
301
+ else:
302
+ print(f"Failed to delete workspace {workspace_id}")
303
 
304
+ # Create new workspace
305
  new_workspace_id = create_workspace()
306
  if not new_workspace_id:
307
+ return "Failed to create a new workspace after quota exceeded. Please try again later.", bot_id, workspace_id
308
 
309
+ # Create new bot in the new workspace
310
  new_bot_id = create_bot(new_workspace_id)
311
  if not new_bot_id:
312
+ return "Failed to create a new bot after quota exceeded. Please try again later.", new_workspace_id, workspace_id
313
 
314
+ # Update headers with new bot ID and workspace ID
315
  headers["x-bot-id"] = new_bot_id
316
+ headers["x-workspace-id"] = new_workspace_id
317
 
318
  # Try one more time with the new IDs
319
  try:
320
+ print(f"Retrying with new bot_id={new_bot_id}, workspace_id={new_workspace_id}")
321
  retry_response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
322
 
323
  if retry_response.status_code == 200:
324
  data = retry_response.json()
325
+
326
+ # Extract all text segments from the response
327
+ transcription_text = ""
328
  segments = data.get('output', {}).get('segments', [])
329
+ for segment in segments:
330
+ segment_text = segment.get('text', '')
331
+ if segment_text:
332
+ transcription_text += segment_text + " "
333
+
334
+ transcription_text = transcription_text.strip()
335
  print(f"Successfully received transcription with new IDs")
336
+ return transcription_text, new_bot_id, new_workspace_id
337
  else:
338
+ print(f"Failed with new IDs: {retry_response.status_code}, {retry_response.text}")
339
+ return f"Unable to transcribe the audio with new credentials.", new_bot_id, new_workspace_id
340
 
341
  except Exception as e:
342
+ print(f"Error with new IDs: {str(e)}")
343
+ return f"Unable to transcribe the audio with new credentials: {str(e)}", new_bot_id, new_workspace_id
344
 
345
  # Should not reach here due to the handling in the loop
346
+ return "Unable to transcribe the audio.", bot_id, workspace_id
347
 
348
 
349
  # -------------------------------------------------------------------
350
+ # Flask Endpoint
351
  # -------------------------------------------------------------------
352
  @app.route("/transcribe", methods=["POST"])
353
  def transcribe_endpoint():
 
355
  Expects JSON with:
356
  {
357
  "file_url": "string",
358
+ "prompt": "string"
 
 
 
359
  }
360
  Returns JSON with:
361
  {
362
+ "transcription": "string"
 
363
  }
364
  """
365
  global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
 
367
  # Parse JSON from request
368
  data = request.get_json(force=True)
369
  file_url = data.get("file_url", "")
370
+ prompt = data.get("prompt", "get all text with his lang and exatract (DON'T translate) .")
 
 
 
371
 
 
372
  if not file_url:
373
  return jsonify({"error": "Missing file_url parameter"}), 400
374
 
375
  # If we don't yet have a workspace or bot, create them
376
  if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
377
+ print("No existing IDs found. Creating new workspace and bot...")
378
  GLOBAL_WORKSPACE_ID = create_workspace()
379
  if GLOBAL_WORKSPACE_ID:
380
  GLOBAL_BOT_ID = create_bot(GLOBAL_WORKSPACE_ID)
381
 
382
  # If creation failed
383
  if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
384
+ return jsonify({"error": "I'm currently unavailable. Please try again later."}), 500
385
 
386
+ # Call our function that interacts with Botpress API
387
  print(f"Sending transcription request with existing bot_id={GLOBAL_BOT_ID}, workspace_id={GLOBAL_WORKSPACE_ID}")
388
+ transcription, updated_bot_id, updated_workspace_id = transcribe_audio(
389
  file_url,
 
390
  prompt,
 
391
  GLOBAL_BOT_ID,
392
  GLOBAL_WORKSPACE_ID
393
  )
394
 
395
  # Update global IDs if they changed
396
  if updated_bot_id != GLOBAL_BOT_ID or updated_workspace_id != GLOBAL_WORKSPACE_ID:
397
+ print(f"Updating global IDs: bot_id={updated_bot_id}, workspace_id={updated_workspace_id}")
398
  GLOBAL_BOT_ID = updated_bot_id
399
  GLOBAL_WORKSPACE_ID = updated_workspace_id
400
 
401
+ return jsonify({"transcription": transcription})
 
 
 
 
 
 
 
402
 
403
 
404
  # -------------------------------------------------------------------