from flask import Flask, request, jsonify import requests import random import string import time import os import json from datetime import datetime, timedelta app = Flask(__name__) # Global variables to store workspace and bot IDs GLOBAL_WORKSPACE_ID = None GLOBAL_BOT_ID = None # Cookie value used in requests (should be updated with a valid cookie) AUTH_COOKIE = "pscd=try.botpress.com; intercom-device-id-bjzkw2xf=de9d7f9e-fc25-4e60-ab1e-1faefc59cb9d; hubspotutk=5e2dc85f440e44366573a18c08710b4b; _hjSessionUser_3339867=eyJpZCI6ImU1Njc4OWU2LWNlNWYtNWI5ZC05ZWI4LTAwNDgyZDE0OTE2ZiIsImNyZWF0ZWQiOjE3NDkyMjE5NTc1NjAsImV4aXN0aW5nIjp0cnVlfQ==; _hjSessionUser_2931810=eyJpZCI6IjA4MWIzNmJiLTUwNGEtNTkzMi05MTk2LWZhYzg3YjIwNGI4MiIsImNyZWF0ZWQiOjE3NDkyMjE5NTc1NzUsImV4aXN0aW5nIjp0cnVlfQ==; csrf_token_bd9ac21c34b9f0915e733c3e5305d737d0722c1168be7376b889426b5ec2a298=pNVlTs/ub+CA07dGj1K0YqdpRJtMfb4qo56D9yY1g4s=; ory_kratos_session=MTc0OTQ4NzUxN3w0YnNMRmloWTZvYVM1ZzN2WU5ERW9kcmtLQ0NYLUlxbnhmMDZVaW1YUGl5ZFlhQ1RYTnhnWDlQNHNXakxqbkk0a0QwV0xhS2d0dHRBNWhUaUg2NlpkaHJzeEpnNFVzV3RNYnkxNDBSXzAxa0YwNkRhLVprb295WWpkRFp2ekRyU0sweUdtTEJLbDM4LWhXNHZNb1N5T3RURGM0MjY4YmFTYnhESjVFOGZFaVZrYkI5SkxMZjF1VDdwQkhnazNwRngxeUtpYW45MFY3dU9OUUpmc3pHM3p4aXdnMTdhRlFubF9QUEFhb1BqV21aUXRGWEcxMEx1UVVVbWZVZW43LUZTczVkdGxBbi1ya2N3bWRMTldwdkZkZz09fOqIWWhw3VVcaLHm46FguqaIPVZcO0Fi1ETNLhgenNrf; ajs_user_id=d403ad7b-ea73-4d29-b977-5fd95afd585c; _hjSession_2931810=eyJpZCI6Ijg0Y2VkNTU0LTg1ZGMtNDgwZC04MDg4LTgwOWIyODBjZTM2OCIsImMiOjE3NDk3MTMxNTUxMDgsInMiOjAsInIiOjAsInNiIjowLCJzciI6MCwic2UiOjAsImZzIjowLCJzcCI6MX0=; _ga=GA1.2.637218704.1749221961; _gid=GA1.2.215266273.1749713156; __hstc=59821234.5e2dc85f440e44366573a18c08710b4b.1749221970532.1749501217019.1749713156411.4; __hssrc=1; __hssc=59821234.1.1749713156411; _ga_W6YT9YSNLH=GS2.2.s1749713156$o4$g0$t1749713156$j60$l0$h0; _ga_CYSS87Q508=GS2.2.s1749713157$o4$g0$t1749713157$j60$l0$h0; _ga_PCC6TBWJY6=GS2.1.s1749713155$o181$g1$t1749713158$j57$l0$h0; _ga_HKHSWES9V9=GS2.1.s1749713155$o181$g1$t1749713158$j57$l0$h1340410281; _hjSession_3339867=eyJpZCI6Ijc3MGI3YWRhLTdkZjctNDk2OS1iMTdlLWQzMjYwZGZkNWEzMSIsImMiOjE3NDk3MTMxNzE0NzIsInMiOjAsInIiOjAsInNiIjowLCJzciI6MCwic2UiOjAsImZzIjowLCJzcCI6MX0=; mp_1195923e954ce61d822842b5832047cd_mixpanel=%7B%22distinct_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%2C%22%24device_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%2C%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fapp.botpress.cloud%2F%22%2C%22%24initial_referring_domain%22%3A%20%22app.botpress.cloud%22%2C%22__mps%22%3A%20%7B%7D%2C%22__mpso%22%3A%20%7B%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fapp.botpress.cloud%2F%22%2C%22%24initial_referring_domain%22%3A%20%22app.botpress.cloud%22%7D%2C%22__mpus%22%3A%20%7B%7D%2C%22__mpa%22%3A%20%7B%7D%2C%22__mpu%22%3A%20%7B%7D%2C%22__mpr%22%3A%20%5B%5D%2C%22__mpap%22%3A%20%5B%5D%7D; ajs_anonymous_id=d88d613c-7f3f-440f-a767-f1bcdc2a2b04; intercom-session-bjzkw2xf=Q0lxL0k1MUhHaHJzVUF2bk9FQ3Nzd2gyakdIQ2RtU2tsOXozM0h0OEJ4Nmt5WG9NeHB4U3ZPLzRFQTQ4ZGg0RUZIK1VaYXU1MXdIS0hvZ1NyTU9LT2E3VEt2TEJzZXJlOHZiRFhlN1R6Vmc9LS1OdGtGZFk1Rng2cXVSOFFTUjlVUkFRPT0=--be4fc714d16f6f84e101f87166a5c0243842a35e" # ------------------------------------------------------------------- # Helper functions for random bot/workspace names # ------------------------------------------------------------------- def generate_random_name(length=5): """Generate a random name for workspace or bot""" return ''.join(random.choices(string.ascii_letters, k=length)) # ------------------------------------------------------------------- # Functions to create/delete workspaces and bots # ------------------------------------------------------------------- def create_workspace(): """Create a new workspace and return its ID""" ws_url = "https://api.botpress.cloud/v1/admin/workspaces" headers = { "User-Agent": "Mozilla/5.0", "Cookie": AUTH_COOKIE } payload = {"name": generate_random_name()} try: response = requests.post(ws_url, headers=headers, json=payload) if response.status_code == 200: response_json = response.json() workspace_id = response_json.get('id') print(f"Successfully created workspace: {workspace_id}") return workspace_id else: print(f"Workspace creation failed with: {response.status_code}, {response.text}") return None except Exception as e: print(f"Error creating workspace: {str(e)}") return None def create_bot(workspace_id): """Create a new bot in the specified workspace and return its ID""" if not workspace_id: print("Cannot create bot: No workspace ID provided") return None bot_url = "https://api.botpress.cloud/v1/admin/bots" headers = { "User-Agent": "Mozilla/5.0", "x-workspace-id": workspace_id, "Cookie": AUTH_COOKIE, "Content-Type": "application/json" } payload = {"name": generate_random_name()} try: response = requests.post(bot_url, headers=headers, json=payload) if response.status_code == 200: response_json = response.json() bot_id = response_json.get("bot", {}).get("id") if not bot_id: print("Bot ID not found in the response.") return None print(f"Successfully created bot: {bot_id} in workspace: {workspace_id}") # Install integration for the new bot integration_success = install_bot_integration(bot_id, workspace_id) if integration_success: print(f"Successfully installed integration for bot {bot_id}") return bot_id else: print(f"Failed to install integration for bot {bot_id}") return bot_id # Still return the bot ID even if integration fails else: print(f"Bot creation failed with: {response.status_code}, {response.text}") return None except Exception as e: print(f"Error creating bot: {str(e)}") return None def install_bot_integration(bot_id, workspace_id): """Install required integration for the bot to function properly""" if not bot_id or not workspace_id: print("Cannot install integration: Missing bot ID or workspace ID") return False url = f"https://api.botpress.cloud/v1/admin/bots/{bot_id}" headers = { "User-Agent": "Mozilla/5.0", "Cookie": AUTH_COOKIE, "Content-Type": "application/json", "x-bot-id": bot_id, "x-workspace-id": workspace_id } # Integration payload payload = { "integrations": { "intver_01JSERFV00FYHW4SM6TEKZ9RWS": { "enabled": True } } } try: response = requests.put(url, headers=headers, json=payload) if response.status_code == 200: print(f"Successfully installed integration for bot {bot_id}") return True else: print(f"Failed to install integration: {response.status_code}, {response.text}") return False except Exception as e: print(f"Error installing integration: {str(e)}") return False def delete_bot(bot_id, workspace_id): """Delete a bot from the specified workspace""" if not bot_id or not workspace_id: print("Cannot delete bot: Missing bot ID or workspace ID") return False url = f"https://api.botpress.cloud/v1/admin/bots/{bot_id}" headers = { "User-Agent": "Mozilla/5.0", "x-workspace-id": workspace_id, "Cookie": AUTH_COOKIE } try: response = requests.delete(url, headers=headers) if response.status_code in [200, 204]: print(f"Successfully deleted bot: {bot_id}") return True else: print(f"Failed to delete bot: {response.status_code}, {response.text}") return False except Exception as e: print(f"Error deleting bot: {str(e)}") return False def delete_workspace(workspace_id): """Delete a workspace""" if not workspace_id: print("Cannot delete workspace: No workspace ID provided") return False url = f"https://api.botpress.cloud/v1/admin/workspaces/{workspace_id}" headers = { "User-Agent": "Mozilla/5.0", "Cookie": AUTH_COOKIE } try: response = requests.delete(url, headers=headers) if response.status_code in [200, 204]: print(f"Successfully deleted workspace: {workspace_id}") return True else: print(f"Failed to delete workspace: {response.status_code}, {response.text}") return False except Exception as e: print(f"Error deleting workspace: {str(e)}") return False # ------------------------------------------------------------------- # Function to upload audio file and get URL # ------------------------------------------------------------------- def upload_audio_file(file_path, bot_id, workspace_id): """Upload an audio file and return its URL""" global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID # API endpoint url = "https://api.botpress.cloud/v1/files" # Get file name from path file_name = os.path.basename(file_path) # Get file size file_size = os.path.getsize(file_path) # Get file content type content_type = "audio/mpeg" # Calculate expiration date (3 days from now) with proper timezone format now = datetime.now() expires_at = (now + timedelta(days=3)).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "+02:00" # Prepare headers headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", "x-bot-id": bot_id, "x-workspace-id": workspace_id, "Content-Type": "application/json", "cookie": AUTH_COOKIE } # Prepare payload payload = { "key": file_name, "tags": { "purpose": "emulator", "system": "true" }, "size": file_size, "accessPolicies": ["public_content"], "contentType": content_type, "expiresAt": expires_at, "publicContentImmediatelyAccessible": True } # Make the PUT request to get the upload URL response = requests.put(url, headers=headers, data=json.dumps(payload)) if response.status_code == 200: response_data = response.json() # Extract the URL and upload URL file_url = response_data.get("file", {}).get("url", "") upload_url = response_data.get("file", {}).get("uploadUrl", "") # Now upload the actual file to the upload URL if upload_url: # Read the binary content of the audio file with open(file_path, 'rb') as audio_file: file_content = audio_file.read() # Set headers for the upload request with all the headers you provided upload_headers = { "accept": "application/json, text/plain, */*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "en-US,en;q=0.9,ar;q=0.8", "connection": "keep-alive", "content-length": str(file_size), "content-type": "audio/mpeg", "host": "s3.us-east-1.amazonaws.com", "origin": "https://studio.botpress.cloud", "referer": "https://studio.botpress.cloud/", "sec-ch-ua": '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "cross-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", "x-amz-tagging": "public=true" } # Make the PUT request to upload the file upload_response = requests.put(upload_url, headers=upload_headers, data=file_content) if upload_response.status_code == 200: return file_url, bot_id, workspace_id else: return f"Error uploading file: {upload_response.status_code} - {upload_response.text}", bot_id, workspace_id else: return "No upload URL provided in response", bot_id, workspace_id elif response.status_code == 403: # If we get a 403 error, we need to delete and recreate the bot and workspace print("Received 403 error during file upload. Recreating bot and workspace...") # Delete bot first, then workspace if bot_id and workspace_id: delete_bot(bot_id, workspace_id) delete_workspace(workspace_id) # Create new workspace and bot new_workspace_id = create_workspace() if not new_workspace_id: return "Failed to create a new workspace after 403 error", bot_id, workspace_id new_bot_id = create_bot(new_workspace_id) if not new_bot_id: return "Failed to create a new bot after 403 error", new_workspace_id, workspace_id # Update global variables GLOBAL_WORKSPACE_ID = new_workspace_id GLOBAL_BOT_ID = new_bot_id # Try again with the new IDs result, _, _ = upload_audio_file(file_path, new_bot_id, new_workspace_id) return result, new_bot_id, new_workspace_id else: return f"Error: {response.status_code} - {response.text}", bot_id, workspace_id # ------------------------------------------------------------------- # Main function that calls the Botpress API endpoint for audio transcription # ------------------------------------------------------------------- def transcribe_audio(file_url, prompt, bot_id, workspace_id): """ Sends the audio file URL to the Botpress API endpoint for transcription, returns the transcription text and (possibly updated) bot/workspace IDs. """ global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID # Prepare the headers headers = { "User-Agent": "Mozilla/5.0", "x-bot-id": bot_id, "x-workspace-id": workspace_id, "Content-Type": "application/json", "Cookie": AUTH_COOKIE } # Prepare the payload for the API payload = { "type": "openai:transcribeAudio", "input": { "fileUrl": file_url, "prompt": prompt, "temperature": 0 } } botpress_url = "https://api.botpress.cloud/v1/chat/actions" max_retries = 3 timeout = 120 # Increased timeout for long audio files # Attempt to send the request for attempt in range(max_retries): try: print(f"Attempt {attempt+1}: Sending transcription request to Botpress API with bot_id={bot_id}, workspace_id={workspace_id}") response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout) # If successful (200) if response.status_code == 200: data = response.json() # Extract all text segments from the response transcription_text = "" segments = data.get('output', {}).get('segments', []) for segment in segments: segment_text = segment.get('text', '') if segment_text: transcription_text += segment_text + " " transcription_text = transcription_text.strip() print(f"Successfully received transcription from Botpress API") return transcription_text, bot_id, workspace_id # If we get a 403 error, delete and recreate workspace/bot elif response.status_code == 403: print(f"Received 403 error. Deleting and recreating workspace/bot...") break # Break out of the retry loop to handle 403 specially # Handle network errors or timeouts (just retry) elif response.status_code in [404, 408, 502, 503, 504]: print(f"Received error {response.status_code}. Retrying...") time.sleep(3) # Wait before retrying continue # Any other error status code else: print(f"Received unexpected error: {response.status_code}, {response.text}") if attempt < max_retries - 1: time.sleep(2) continue else: return f"Unable to transcribe the audio (Error {response.status_code}).", bot_id, workspace_id except requests.exceptions.Timeout: print(f"Request timed out. Retrying...") if attempt < max_retries - 1: time.sleep(2) continue else: return "The transcription is taking too long. Please try again with a shorter audio file.", bot_id, workspace_id except Exception as e: print(f"Error during request: {str(e)}") if attempt < max_retries - 1: time.sleep(2) continue else: return f"Unable to transcribe the audio: {str(e)}", bot_id, workspace_id # If we got a 403 error, delete and recreate resources # First delete the bot, then the workspace (in that order) if bot_id and workspace_id: print(f"Deleting bot {bot_id} first...") delete_success = delete_bot(bot_id, workspace_id) if delete_success: print(f"Successfully deleted bot {bot_id}") else: print(f"Failed to delete bot {bot_id}") print(f"Now deleting workspace {workspace_id}...") ws_delete_success = delete_workspace(workspace_id) if ws_delete_success: print(f"Successfully deleted workspace {workspace_id}") else: print(f"Failed to delete workspace {workspace_id}") # Create new workspace new_workspace_id = create_workspace() if not new_workspace_id: return "Failed to create a new workspace. Please try again later.", bot_id, workspace_id # Create new bot in the new workspace new_bot_id = create_bot(new_workspace_id) if not new_bot_id: return "Failed to create a new bot. Please try again later.", new_workspace_id, workspace_id # Update global variables GLOBAL_WORKSPACE_ID = new_workspace_id GLOBAL_BOT_ID = new_bot_id # Update headers with new bot ID and workspace ID headers["x-bot-id"] = new_bot_id headers["x-workspace-id"] = new_workspace_id # Try one more time with the new IDs try: print(f"Retrying with new bot_id={new_bot_id}, workspace_id={new_workspace_id}") retry_response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout) if retry_response.status_code == 200: data = retry_response.json() # Extract all text segments from the response transcription_text = "" segments = data.get('output', {}).get('segments', []) for segment in segments: segment_text = segment.get('text', '') if segment_text: transcription_text += segment_text + " " transcription_text = transcription_text.strip() print(f"Successfully received transcription with new IDs") return transcription_text, new_bot_id, new_workspace_id else: print(f"Failed with new IDs: {retry_response.status_code}, {retry_response.text}") return f"Unable to transcribe the audio with new credentials.", new_bot_id, new_workspace_id except Exception as e: print(f"Error with new IDs: {str(e)}") return f"Unable to transcribe the audio with new credentials: {str(e)}", new_bot_id, new_workspace_id # ------------------------------------------------------------------- # Flask Endpoints # ------------------------------------------------------------------- @app.route("/transcribe", methods=["POST"]) def transcribe_endpoint(): """ Expects JSON with: { "file_url": "string", "prompt": "string" } Returns JSON with: { "transcription": "string" } """ global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID # Parse JSON from request data = request.get_json(force=True) file_url = data.get("file_url", "") prompt = data.get("prompt", "get all text with his lang and exatract (DON'T translate) .") if not file_url: return jsonify({"error": "Missing file_url parameter"}), 400 # If we don't yet have a workspace or bot, create them if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID: print("No existing IDs found. Creating new workspace and bot...") GLOBAL_WORKSPACE_ID = create_workspace() if GLOBAL_WORKSPACE_ID: GLOBAL_BOT_ID = create_bot(GLOBAL_WORKSPACE_ID) # If creation failed if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID: return jsonify({"error": "I'm currently unavailable. Please try again later."}), 500 # Call our function that interacts with Botpress API print(f"Sending transcription request with existing bot_id={GLOBAL_BOT_ID}, workspace_id={GLOBAL_WORKSPACE_ID}") transcription, updated_bot_id, updated_workspace_id = transcribe_audio( file_url, prompt, GLOBAL_BOT_ID, GLOBAL_WORKSPACE_ID ) # Update global IDs if they changed if updated_bot_id != GLOBAL_BOT_ID or updated_workspace_id != GLOBAL_WORKSPACE_ID: print(f"Updating global IDs: bot_id={updated_bot_id}, workspace_id={updated_workspace_id}") GLOBAL_BOT_ID = updated_bot_id GLOBAL_WORKSPACE_ID = updated_workspace_id # Check if we got an error string back if isinstance(transcription, str) and ( transcription.startswith("Failed") or transcription.startswith("Unable") or transcription.startswith("The transcription is taking too long") ): return jsonify({"error": transcription}), 500 return jsonify({"transcription": transcription}) @app.route("/upload", methods=["POST"]) def upload_endpoint(): """ Endpoint to upload an audio file and get its URL Expects form data with a file field named 'audio' Returns JSON with the file URL """ global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID # Check if file was uploaded if 'audio' not in request.files: return jsonify({"error": "No audio file provided"}), 400 audio_file = request.files['audio'] # Check if filename is empty if audio_file.filename == '': return jsonify({"error": "No audio file selected"}), 400 # If we don't yet have a workspace or bot, create them if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID: print("No existing IDs found. Creating new workspace and bot...") GLOBAL_WORKSPACE_ID = create_workspace() if GLOBAL_WORKSPACE_ID: GLOBAL_BOT_ID = create_bot(GLOBAL_WORKSPACE_ID) # If creation failed if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID: return jsonify({"error": "I'm currently unavailable. Please try again later."}), 500 # Save the file temporarily temp_path = f"/tmp/{audio_file.filename}" audio_file.save(temp_path) # Upload the file file_url, updated_bot_id, updated_workspace_id = upload_audio_file(temp_path, GLOBAL_BOT_ID, GLOBAL_WORKSPACE_ID) # Remove the temporary file os.remove(temp_path) # Update global IDs if they changed if updated_bot_id != GLOBAL_BOT_ID or updated_workspace_id != GLOBAL_WORKSPACE_ID: print(f"Updating global IDs: bot_id={updated_bot_id}, workspace_id={updated_workspace_id}") GLOBAL_BOT_ID = updated_bot_id GLOBAL_WORKSPACE_ID = updated_workspace_id # Check if we got an error string back if isinstance(file_url, str) and (file_url.startswith("Error") or file_url.startswith("Failed") or file_url.startswith("No upload")): return jsonify({"error": file_url}), 500 return jsonify({"file_url": file_url}) # ------------------------------------------------------------------- # Run the Flask app # ------------------------------------------------------------------- if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=True)