CORVO-AI commited on
Commit
3747525
·
verified ·
1 Parent(s): 1d7f518

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +237 -77
app.py CHANGED
@@ -3,6 +3,9 @@ import requests
3
  import random
4
  import string
5
  import time
 
 
 
6
 
7
  app = Flask(__name__)
8
 
@@ -176,6 +179,127 @@ def delete_workspace(workspace_id):
176
  return False
177
 
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  # -------------------------------------------------------------------
180
  # Main function that calls the Botpress API endpoint for audio transcription
181
  # -------------------------------------------------------------------
@@ -184,6 +308,8 @@ def transcribe_audio(file_url, prompt, bot_id, workspace_id):
184
  Sends the audio file URL to the Botpress API endpoint for transcription,
185
  returns the transcription text and (possibly updated) bot/workspace IDs.
186
  """
 
 
187
  # Prepare the headers
188
  headers = {
189
  "User-Agent": "Mozilla/5.0",
@@ -207,9 +333,6 @@ def transcribe_audio(file_url, prompt, bot_id, workspace_id):
207
  max_retries = 3
208
  timeout = 120 # Increased timeout for long audio files
209
 
210
- # Flag to track if we need to create new IDs due to quota exceeded
211
- quota_exceeded = False
212
-
213
  # Attempt to send the request
214
  for attempt in range(max_retries):
215
  try:
@@ -232,23 +355,10 @@ def transcribe_audio(file_url, prompt, bot_id, workspace_id):
232
  print(f"Successfully received transcription from Botpress API")
233
  return transcription_text, bot_id, workspace_id
234
 
235
- # Check for quota exceeded error specifically
236
  elif response.status_code == 403:
237
- error_data = response.json()
238
- error_message = error_data.get('message', '')
239
-
240
- # Check if this is the specific quota exceeded error
241
- if "has reached its usage limit for ai spend" in error_message:
242
- print(f"Quota exceeded error detected: {error_message}")
243
- quota_exceeded = True
244
- break
245
- else:
246
- print(f"Received 403 error but not quota exceeded: {error_message}")
247
- if attempt < max_retries - 1:
248
- time.sleep(2)
249
- continue
250
- else:
251
- return f"Unable to transcribe the audio (Error 403).", bot_id, workspace_id
252
 
253
  # Handle network errors or timeouts (just retry)
254
  elif response.status_code in [404, 408, 502, 503, 504]:
@@ -281,73 +391,70 @@ def transcribe_audio(file_url, prompt, bot_id, workspace_id):
281
  else:
282
  return f"Unable to transcribe the audio: {str(e)}", bot_id, workspace_id
283
 
284
- # If quota exceeded, we need to create new resources
285
- if quota_exceeded:
286
- print("Quota exceeded. Creating new workspace and bot...")
287
-
288
- # First delete the bot, then the workspace (in that order)
289
- if bot_id and workspace_id:
290
- print(f"Deleting bot {bot_id} first...")
291
- delete_success = delete_bot(bot_id, workspace_id)
292
- if delete_success:
293
- print(f"Successfully deleted bot {bot_id}")
294
- else:
295
- print(f"Failed to delete bot {bot_id}")
296
-
297
- print(f"Now deleting workspace {workspace_id}...")
298
- ws_delete_success = delete_workspace(workspace_id)
299
- if ws_delete_success:
300
- print(f"Successfully deleted workspace {workspace_id}")
301
- else:
302
- print(f"Failed to delete workspace {workspace_id}")
303
-
304
- # Create new workspace
305
- new_workspace_id = create_workspace()
306
- if not new_workspace_id:
307
- return "Failed to create a new workspace after quota exceeded. Please try again later.", bot_id, workspace_id
308
-
309
- # Create new bot in the new workspace
310
- new_bot_id = create_bot(new_workspace_id)
311
- if not new_bot_id:
312
- return "Failed to create a new bot after quota exceeded. Please try again later.", new_workspace_id, workspace_id
313
-
314
- # Update headers with new bot ID and workspace ID
315
- headers["x-bot-id"] = new_bot_id
316
- headers["x-workspace-id"] = new_workspace_id
317
 
318
- # Try one more time with the new IDs
319
- try:
320
- print(f"Retrying with new bot_id={new_bot_id}, workspace_id={new_workspace_id}")
321
- retry_response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
 
 
322
 
323
- if retry_response.status_code == 200:
324
- data = retry_response.json()
 
 
325
 
326
- # Extract all text segments from the response
327
- transcription_text = ""
328
- segments = data.get('output', {}).get('segments', [])
329
- for segment in segments:
330
- segment_text = segment.get('text', '')
331
- if segment_text:
332
- transcription_text += segment_text + " "
333
 
334
- transcription_text = transcription_text.strip()
335
- print(f"Successfully received transcription with new IDs")
336
- return transcription_text, new_bot_id, new_workspace_id
337
- else:
338
- print(f"Failed with new IDs: {retry_response.status_code}, {retry_response.text}")
339
- return f"Unable to transcribe the audio with new credentials.", new_bot_id, new_workspace_id
340
 
341
- except Exception as e:
342
- print(f"Error with new IDs: {str(e)}")
343
- return f"Unable to transcribe the audio with new credentials: {str(e)}", new_bot_id, new_workspace_id
344
 
345
- # Should not reach here due to the handling in the loop
346
- return "Unable to transcribe the audio.", bot_id, workspace_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
 
 
 
348
 
349
  # -------------------------------------------------------------------
350
- # Flask Endpoint
351
  # -------------------------------------------------------------------
352
  @app.route("/transcribe", methods=["POST"])
353
  def transcribe_endpoint():
@@ -401,6 +508,59 @@ def transcribe_endpoint():
401
  return jsonify({"transcription": transcription})
402
 
403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  # -------------------------------------------------------------------
405
  # Run the Flask app
406
  # -------------------------------------------------------------------
 
3
  import random
4
  import string
5
  import time
6
+ import os
7
+ import json
8
+ from datetime import datetime, timedelta
9
 
10
  app = Flask(__name__)
11
 
 
179
  return False
180
 
181
 
182
+ # -------------------------------------------------------------------
183
+ # Function to upload audio file and get URL
184
+ # -------------------------------------------------------------------
185
+ def upload_audio_file(file_path, bot_id, workspace_id):
186
+ """Upload an audio file and return its URL"""
187
+ global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
188
+
189
+ # API endpoint
190
+ url = "https://api.botpress.cloud/v1/files"
191
+
192
+ # Get file name from path
193
+ file_name = os.path.basename(file_path)
194
+
195
+ # Get file size
196
+ file_size = os.path.getsize(file_path)
197
+
198
+ # Get file content type
199
+ content_type = "audio/mpeg"
200
+
201
+ # Calculate expiration date (3 days from now) with proper timezone format
202
+ now = datetime.now()
203
+ expires_at = (now + timedelta(days=3)).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "+02:00"
204
+
205
+ # Prepare headers
206
+ headers = {
207
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
208
+ "x-bot-id": bot_id,
209
+ "x-workspace-id": workspace_id,
210
+ "Content-Type": "application/json",
211
+ "cookie": AUTH_COOKIE
212
+ }
213
+
214
+ # Prepare payload
215
+ payload = {
216
+ "key": file_name,
217
+ "tags": {
218
+ "purpose": "emulator",
219
+ "system": "true"
220
+ },
221
+ "size": file_size,
222
+ "accessPolicies": ["public_content"],
223
+ "contentType": content_type,
224
+ "expiresAt": expires_at,
225
+ "publicContentImmediatelyAccessible": True
226
+ }
227
+
228
+ # Make the PUT request to get the upload URL
229
+ response = requests.put(url, headers=headers, data=json.dumps(payload))
230
+
231
+ if response.status_code == 200:
232
+ response_data = response.json()
233
+
234
+ # Extract the URL and upload URL
235
+ file_url = response_data.get("file", {}).get("url", "")
236
+ upload_url = response_data.get("file", {}).get("uploadUrl", "")
237
+
238
+ # Now upload the actual file to the upload URL
239
+ if upload_url:
240
+ # Read the binary content of the audio file
241
+ with open(file_path, 'rb') as audio_file:
242
+ file_content = audio_file.read()
243
+
244
+ # Set headers for the upload request with all the headers you provided
245
+ upload_headers = {
246
+ "accept": "application/json, text/plain, */*",
247
+ "accept-encoding": "gzip, deflate, br, zstd",
248
+ "accept-language": "en-US,en;q=0.9,ar;q=0.8",
249
+ "connection": "keep-alive",
250
+ "content-length": str(file_size),
251
+ "content-type": "audio/mpeg",
252
+ "host": "s3.us-east-1.amazonaws.com",
253
+ "origin": "https://studio.botpress.cloud",
254
+ "referer": "https://studio.botpress.cloud/",
255
+ "sec-ch-ua": '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
256
+ "sec-ch-ua-mobile": "?0",
257
+ "sec-ch-ua-platform": '"Windows"',
258
+ "sec-fetch-dest": "empty",
259
+ "sec-fetch-mode": "cors",
260
+ "sec-fetch-site": "cross-site",
261
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
262
+ "x-amz-tagging": "public=true"
263
+ }
264
+
265
+ # Make the PUT request to upload the file
266
+ upload_response = requests.put(upload_url, headers=upload_headers, data=file_content)
267
+
268
+ if upload_response.status_code == 200:
269
+ return file_url, bot_id, workspace_id
270
+ else:
271
+ return f"Error uploading file: {upload_response.status_code} - {upload_response.text}", bot_id, workspace_id
272
+ else:
273
+ return "No upload URL provided in response", bot_id, workspace_id
274
+ elif response.status_code == 403:
275
+ # If we get a 403 error, we need to delete and recreate the bot and workspace
276
+ print("Received 403 error during file upload. Recreating bot and workspace...")
277
+
278
+ # Delete bot first, then workspace
279
+ if bot_id and workspace_id:
280
+ delete_bot(bot_id, workspace_id)
281
+ delete_workspace(workspace_id)
282
+
283
+ # Create new workspace and bot
284
+ new_workspace_id = create_workspace()
285
+ if not new_workspace_id:
286
+ return "Failed to create a new workspace after 403 error", bot_id, workspace_id
287
+
288
+ new_bot_id = create_bot(new_workspace_id)
289
+ if not new_bot_id:
290
+ return "Failed to create a new bot after 403 error", new_workspace_id, workspace_id
291
+
292
+ # Update global variables
293
+ GLOBAL_WORKSPACE_ID = new_workspace_id
294
+ GLOBAL_BOT_ID = new_bot_id
295
+
296
+ # Try again with the new IDs
297
+ result, _, _ = upload_audio_file(file_path, new_bot_id, new_workspace_id)
298
+ return result, new_bot_id, new_workspace_id
299
+ else:
300
+ return f"Error: {response.status_code} - {response.text}", bot_id, workspace_id
301
+
302
+
303
  # -------------------------------------------------------------------
304
  # Main function that calls the Botpress API endpoint for audio transcription
305
  # -------------------------------------------------------------------
 
308
  Sends the audio file URL to the Botpress API endpoint for transcription,
309
  returns the transcription text and (possibly updated) bot/workspace IDs.
310
  """
311
+ global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
312
+
313
  # Prepare the headers
314
  headers = {
315
  "User-Agent": "Mozilla/5.0",
 
333
  max_retries = 3
334
  timeout = 120 # Increased timeout for long audio files
335
 
 
 
 
336
  # Attempt to send the request
337
  for attempt in range(max_retries):
338
  try:
 
355
  print(f"Successfully received transcription from Botpress API")
356
  return transcription_text, bot_id, workspace_id
357
 
358
+ # If we get a 403 error, delete and recreate workspace/bot
359
  elif response.status_code == 403:
360
+ print(f"Received 403 error. Deleting and recreating workspace/bot...")
361
+ break # Break out of the retry loop to handle 403 specially
 
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
  # Handle network errors or timeouts (just retry)
364
  elif response.status_code in [404, 408, 502, 503, 504]:
 
391
  else:
392
  return f"Unable to transcribe the audio: {str(e)}", bot_id, workspace_id
393
 
394
+ # If we got a 403 error, delete and recreate resources
395
+ # First delete the bot, then the workspace (in that order)
396
+ if bot_id and workspace_id:
397
+ print(f"Deleting bot {bot_id} first...")
398
+ delete_success = delete_bot(bot_id, workspace_id)
399
+ if delete_success:
400
+ print(f"Successfully deleted bot {bot_id}")
401
+ else:
402
+ print(f"Failed to delete bot {bot_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
+ print(f"Now deleting workspace {workspace_id}...")
405
+ ws_delete_success = delete_workspace(workspace_id)
406
+ if ws_delete_success:
407
+ print(f"Successfully deleted workspace {workspace_id}")
408
+ else:
409
+ print(f"Failed to delete workspace {workspace_id}")
410
 
411
+ # Create new workspace
412
+ new_workspace_id = create_workspace()
413
+ if not new_workspace_id:
414
+ return "Failed to create a new workspace. Please try again later.", bot_id, workspace_id
415
 
416
+ # Create new bot in the new workspace
417
+ new_bot_id = create_bot(new_workspace_id)
418
+ if not new_bot_id:
419
+ return "Failed to create a new bot. Please try again later.", new_workspace_id, workspace_id
 
 
 
420
 
421
+ # Update global variables
422
+ GLOBAL_WORKSPACE_ID = new_workspace_id
423
+ GLOBAL_BOT_ID = new_bot_id
 
 
 
424
 
425
+ # Update headers with new bot ID and workspace ID
426
+ headers["x-bot-id"] = new_bot_id
427
+ headers["x-workspace-id"] = new_workspace_id
428
 
429
+ # Try one more time with the new IDs
430
+ try:
431
+ print(f"Retrying with new bot_id={new_bot_id}, workspace_id={new_workspace_id}")
432
+ retry_response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
433
+
434
+ if retry_response.status_code == 200:
435
+ data = retry_response.json()
436
+
437
+ # Extract all text segments from the response
438
+ transcription_text = ""
439
+ segments = data.get('output', {}).get('segments', [])
440
+ for segment in segments:
441
+ segment_text = segment.get('text', '')
442
+ if segment_text:
443
+ transcription_text += segment_text + " "
444
+
445
+ transcription_text = transcription_text.strip()
446
+ print(f"Successfully received transcription with new IDs")
447
+ return transcription_text, new_bot_id, new_workspace_id
448
+ else:
449
+ print(f"Failed with new IDs: {retry_response.status_code}, {retry_response.text}")
450
+ return f"Unable to transcribe the audio with new credentials.", new_bot_id, new_workspace_id
451
 
452
+ except Exception as e:
453
+ print(f"Error with new IDs: {str(e)}")
454
+ return f"Unable to transcribe the audio with new credentials: {str(e)}", new_bot_id, new_workspace_id
455
 
456
  # -------------------------------------------------------------------
457
+ # Flask Endpoints
458
  # -------------------------------------------------------------------
459
  @app.route("/transcribe", methods=["POST"])
460
  def transcribe_endpoint():
 
508
  return jsonify({"transcription": transcription})
509
 
510
 
511
+ @app.route("/upload", methods=["POST"])
512
+ def upload_endpoint():
513
+ """
514
+ Endpoint to upload an audio file and get its URL
515
+ Expects form data with a file field named 'audio'
516
+ Returns JSON with the file URL
517
+ """
518
+ global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
519
+
520
+ # Check if file was uploaded
521
+ if 'audio' not in request.files:
522
+ return jsonify({"error": "No audio file provided"}), 400
523
+
524
+ audio_file = request.files['audio']
525
+
526
+ # Check if filename is empty
527
+ if audio_file.filename == '':
528
+ return jsonify({"error": "No audio file selected"}), 400
529
+
530
+ # If we don't yet have a workspace or bot, create them
531
+ if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
532
+ print("No existing IDs found. Creating new workspace and bot...")
533
+ GLOBAL_WORKSPACE_ID = create_workspace()
534
+ if GLOBAL_WORKSPACE_ID:
535
+ GLOBAL_BOT_ID = create_bot(GLOBAL_WORKSPACE_ID)
536
+
537
+ # If creation failed
538
+ if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
539
+ return jsonify({"error": "I'm currently unavailable. Please try again later."}), 500
540
+
541
+ # Save the file temporarily
542
+ temp_path = f"/tmp/{audio_file.filename}"
543
+ audio_file.save(temp_path)
544
+
545
+ # Upload the file
546
+ file_url, updated_bot_id, updated_workspace_id = upload_audio_file(temp_path, GLOBAL_BOT_ID, GLOBAL_WORKSPACE_ID)
547
+
548
+ # Remove the temporary file
549
+ os.remove(temp_path)
550
+
551
+ # Update global IDs if they changed
552
+ if updated_bot_id != GLOBAL_BOT_ID or updated_workspace_id != GLOBAL_WORKSPACE_ID:
553
+ print(f"Updating global IDs: bot_id={updated_bot_id}, workspace_id={updated_workspace_id}")
554
+ GLOBAL_BOT_ID = updated_bot_id
555
+ GLOBAL_WORKSPACE_ID = updated_workspace_id
556
+
557
+ # Check if we got an error string back
558
+ if isinstance(file_url, str) and (file_url.startswith("Error") or file_url.startswith("Failed") or file_url.startswith("No upload")):
559
+ return jsonify({"error": file_url}), 500
560
+
561
+ return jsonify({"file_url": file_url})
562
+
563
+
564
  # -------------------------------------------------------------------
565
  # Run the Flask app
566
  # -------------------------------------------------------------------