transcript / app.py
CORVO-AI's picture
Update app.py
c96f280 verified
from flask import Flask, request, jsonify
import requests
import random
import string
import time
import os
import json
from datetime import datetime, timedelta
app = Flask(__name__)
# Global variables to store workspace and bot IDs
GLOBAL_WORKSPACE_ID = None
GLOBAL_BOT_ID = None
# Cookie value used in requests (should be updated with a valid cookie)
AUTH_COOKIE = "pscd=try.botpress.com; intercom-device-id-bjzkw2xf=de9d7f9e-fc25-4e60-ab1e-1faefc59cb9d; hubspotutk=5e2dc85f440e44366573a18c08710b4b; _hjSessionUser_3339867=eyJpZCI6ImU1Njc4OWU2LWNlNWYtNWI5ZC05ZWI4LTAwNDgyZDE0OTE2ZiIsImNyZWF0ZWQiOjE3NDkyMjE5NTc1NjAsImV4aXN0aW5nIjp0cnVlfQ==; _hjSessionUser_2931810=eyJpZCI6IjA4MWIzNmJiLTUwNGEtNTkzMi05MTk2LWZhYzg3YjIwNGI4MiIsImNyZWF0ZWQiOjE3NDkyMjE5NTc1NzUsImV4aXN0aW5nIjp0cnVlfQ==; csrf_token_bd9ac21c34b9f0915e733c3e5305d737d0722c1168be7376b889426b5ec2a298=pNVlTs/ub+CA07dGj1K0YqdpRJtMfb4qo56D9yY1g4s=; ory_kratos_session=MTc0OTQ4NzUxN3w0YnNMRmloWTZvYVM1ZzN2WU5ERW9kcmtLQ0NYLUlxbnhmMDZVaW1YUGl5ZFlhQ1RYTnhnWDlQNHNXakxqbkk0a0QwV0xhS2d0dHRBNWhUaUg2NlpkaHJzeEpnNFVzV3RNYnkxNDBSXzAxa0YwNkRhLVprb295WWpkRFp2ekRyU0sweUdtTEJLbDM4LWhXNHZNb1N5T3RURGM0MjY4YmFTYnhESjVFOGZFaVZrYkI5SkxMZjF1VDdwQkhnazNwRngxeUtpYW45MFY3dU9OUUpmc3pHM3p4aXdnMTdhRlFubF9QUEFhb1BqV21aUXRGWEcxMEx1UVVVbWZVZW43LUZTczVkdGxBbi1ya2N3bWRMTldwdkZkZz09fOqIWWhw3VVcaLHm46FguqaIPVZcO0Fi1ETNLhgenNrf; ajs_user_id=d403ad7b-ea73-4d29-b977-5fd95afd585c; _hjSession_2931810=eyJpZCI6Ijg0Y2VkNTU0LTg1ZGMtNDgwZC04MDg4LTgwOWIyODBjZTM2OCIsImMiOjE3NDk3MTMxNTUxMDgsInMiOjAsInIiOjAsInNiIjowLCJzciI6MCwic2UiOjAsImZzIjowLCJzcCI6MX0=; _ga=GA1.2.637218704.1749221961; _gid=GA1.2.215266273.1749713156; __hstc=59821234.5e2dc85f440e44366573a18c08710b4b.1749221970532.1749501217019.1749713156411.4; __hssrc=1; __hssc=59821234.1.1749713156411; _ga_W6YT9YSNLH=GS2.2.s1749713156$o4$g0$t1749713156$j60$l0$h0; _ga_CYSS87Q508=GS2.2.s1749713157$o4$g0$t1749713157$j60$l0$h0; _ga_PCC6TBWJY6=GS2.1.s1749713155$o181$g1$t1749713158$j57$l0$h0; _ga_HKHSWES9V9=GS2.1.s1749713155$o181$g1$t1749713158$j57$l0$h1340410281; _hjSession_3339867=eyJpZCI6Ijc3MGI3YWRhLTdkZjctNDk2OS1iMTdlLWQzMjYwZGZkNWEzMSIsImMiOjE3NDk3MTMxNzE0NzIsInMiOjAsInIiOjAsInNiIjowLCJzciI6MCwic2UiOjAsImZzIjowLCJzcCI6MX0=; mp_1195923e954ce61d822842b5832047cd_mixpanel=%7B%22distinct_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%2C%22%24device_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%2C%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fapp.botpress.cloud%2F%22%2C%22%24initial_referring_domain%22%3A%20%22app.botpress.cloud%22%2C%22__mps%22%3A%20%7B%7D%2C%22__mpso%22%3A%20%7B%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fapp.botpress.cloud%2F%22%2C%22%24initial_referring_domain%22%3A%20%22app.botpress.cloud%22%7D%2C%22__mpus%22%3A%20%7B%7D%2C%22__mpa%22%3A%20%7B%7D%2C%22__mpu%22%3A%20%7B%7D%2C%22__mpr%22%3A%20%5B%5D%2C%22__mpap%22%3A%20%5B%5D%7D; ajs_anonymous_id=d88d613c-7f3f-440f-a767-f1bcdc2a2b04; intercom-session-bjzkw2xf=Q0lxL0k1MUhHaHJzVUF2bk9FQ3Nzd2gyakdIQ2RtU2tsOXozM0h0OEJ4Nmt5WG9NeHB4U3ZPLzRFQTQ4ZGg0RUZIK1VaYXU1MXdIS0hvZ1NyTU9LT2E3VEt2TEJzZXJlOHZiRFhlN1R6Vmc9LS1OdGtGZFk1Rng2cXVSOFFTUjlVUkFRPT0=--be4fc714d16f6f84e101f87166a5c0243842a35e"
# -------------------------------------------------------------------
# Helper functions for random bot/workspace names
# -------------------------------------------------------------------
def generate_random_name(length=5):
"""Generate a random name for workspace or bot"""
return ''.join(random.choices(string.ascii_letters, k=length))
# -------------------------------------------------------------------
# Functions to create/delete workspaces and bots
# -------------------------------------------------------------------
def create_workspace():
"""Create a new workspace and return its ID"""
ws_url = "https://api.botpress.cloud/v1/admin/workspaces"
headers = {
"User-Agent": "Mozilla/5.0",
"Cookie": AUTH_COOKIE
}
payload = {"name": generate_random_name()}
try:
response = requests.post(ws_url, headers=headers, json=payload)
if response.status_code == 200:
response_json = response.json()
workspace_id = response_json.get('id')
print(f"Successfully created workspace: {workspace_id}")
return workspace_id
else:
print(f"Workspace creation failed with: {response.status_code}, {response.text}")
return None
except Exception as e:
print(f"Error creating workspace: {str(e)}")
return None
def create_bot(workspace_id):
"""Create a new bot in the specified workspace and return its ID"""
if not workspace_id:
print("Cannot create bot: No workspace ID provided")
return None
bot_url = "https://api.botpress.cloud/v1/admin/bots"
headers = {
"User-Agent": "Mozilla/5.0",
"x-workspace-id": workspace_id,
"Cookie": AUTH_COOKIE,
"Content-Type": "application/json"
}
payload = {"name": generate_random_name()}
try:
response = requests.post(bot_url, headers=headers, json=payload)
if response.status_code == 200:
response_json = response.json()
bot_id = response_json.get("bot", {}).get("id")
if not bot_id:
print("Bot ID not found in the response.")
return None
print(f"Successfully created bot: {bot_id} in workspace: {workspace_id}")
# Install integration for the new bot
integration_success = install_bot_integration(bot_id, workspace_id)
if integration_success:
print(f"Successfully installed integration for bot {bot_id}")
return bot_id
else:
print(f"Failed to install integration for bot {bot_id}")
return bot_id # Still return the bot ID even if integration fails
else:
print(f"Bot creation failed with: {response.status_code}, {response.text}")
return None
except Exception as e:
print(f"Error creating bot: {str(e)}")
return None
def install_bot_integration(bot_id, workspace_id):
"""Install required integration for the bot to function properly"""
if not bot_id or not workspace_id:
print("Cannot install integration: Missing bot ID or workspace ID")
return False
url = f"https://api.botpress.cloud/v1/admin/bots/{bot_id}"
headers = {
"User-Agent": "Mozilla/5.0",
"Cookie": AUTH_COOKIE,
"Content-Type": "application/json",
"x-bot-id": bot_id,
"x-workspace-id": workspace_id
}
# Integration payload
payload = {
"integrations": {
"intver_01JSERFV00FYHW4SM6TEKZ9RWS": {
"enabled": True
}
}
}
try:
response = requests.put(url, headers=headers, json=payload)
if response.status_code == 200:
print(f"Successfully installed integration for bot {bot_id}")
return True
else:
print(f"Failed to install integration: {response.status_code}, {response.text}")
return False
except Exception as e:
print(f"Error installing integration: {str(e)}")
return False
def delete_bot(bot_id, workspace_id):
"""Delete a bot from the specified workspace"""
if not bot_id or not workspace_id:
print("Cannot delete bot: Missing bot ID or workspace ID")
return False
url = f"https://api.botpress.cloud/v1/admin/bots/{bot_id}"
headers = {
"User-Agent": "Mozilla/5.0",
"x-workspace-id": workspace_id,
"Cookie": AUTH_COOKIE
}
try:
response = requests.delete(url, headers=headers)
if response.status_code in [200, 204]:
print(f"Successfully deleted bot: {bot_id}")
return True
else:
print(f"Failed to delete bot: {response.status_code}, {response.text}")
return False
except Exception as e:
print(f"Error deleting bot: {str(e)}")
return False
def delete_workspace(workspace_id):
"""Delete a workspace"""
if not workspace_id:
print("Cannot delete workspace: No workspace ID provided")
return False
url = f"https://api.botpress.cloud/v1/admin/workspaces/{workspace_id}"
headers = {
"User-Agent": "Mozilla/5.0",
"Cookie": AUTH_COOKIE
}
try:
response = requests.delete(url, headers=headers)
if response.status_code in [200, 204]:
print(f"Successfully deleted workspace: {workspace_id}")
return True
else:
print(f"Failed to delete workspace: {response.status_code}, {response.text}")
return False
except Exception as e:
print(f"Error deleting workspace: {str(e)}")
return False
# -------------------------------------------------------------------
# Function to upload audio file and get URL
# -------------------------------------------------------------------
def upload_audio_file(file_path, bot_id, workspace_id):
"""Upload an audio file and return its URL"""
global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
# API endpoint
url = "https://api.botpress.cloud/v1/files"
# Get file name from path
file_name = os.path.basename(file_path)
# Get file size
file_size = os.path.getsize(file_path)
# Get file content type
content_type = "audio/mpeg"
# Calculate expiration date (3 days from now) with proper timezone format
now = datetime.now()
expires_at = (now + timedelta(days=3)).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "+02:00"
# Prepare headers
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
"x-bot-id": bot_id,
"x-workspace-id": workspace_id,
"Content-Type": "application/json",
"cookie": AUTH_COOKIE
}
# Prepare payload
payload = {
"key": file_name,
"tags": {
"purpose": "emulator",
"system": "true"
},
"size": file_size,
"accessPolicies": ["public_content"],
"contentType": content_type,
"expiresAt": expires_at,
"publicContentImmediatelyAccessible": True
}
# Make the PUT request to get the upload URL
response = requests.put(url, headers=headers, data=json.dumps(payload))
if response.status_code == 200:
response_data = response.json()
# Extract the URL and upload URL
file_url = response_data.get("file", {}).get("url", "")
upload_url = response_data.get("file", {}).get("uploadUrl", "")
# Now upload the actual file to the upload URL
if upload_url:
# Read the binary content of the audio file
with open(file_path, 'rb') as audio_file:
file_content = audio_file.read()
# Set headers for the upload request with all the headers you provided
upload_headers = {
"accept": "application/json, text/plain, */*",
"accept-encoding": "gzip, deflate, br, zstd",
"accept-language": "en-US,en;q=0.9,ar;q=0.8",
"connection": "keep-alive",
"content-length": str(file_size),
"content-type": "audio/mpeg",
"host": "s3.us-east-1.amazonaws.com",
"origin": "https://studio.botpress.cloud",
"referer": "https://studio.botpress.cloud/",
"sec-ch-ua": '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "cross-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
"x-amz-tagging": "public=true"
}
# Make the PUT request to upload the file
upload_response = requests.put(upload_url, headers=upload_headers, data=file_content)
if upload_response.status_code == 200:
return file_url, bot_id, workspace_id
else:
return f"Error uploading file: {upload_response.status_code} - {upload_response.text}", bot_id, workspace_id
else:
return "No upload URL provided in response", bot_id, workspace_id
elif response.status_code == 403:
# If we get a 403 error, we need to delete and recreate the bot and workspace
print("Received 403 error during file upload. Recreating bot and workspace...")
# Delete bot first, then workspace
if bot_id and workspace_id:
delete_bot(bot_id, workspace_id)
delete_workspace(workspace_id)
# Create new workspace and bot
new_workspace_id = create_workspace()
if not new_workspace_id:
return "Failed to create a new workspace after 403 error", bot_id, workspace_id
new_bot_id = create_bot(new_workspace_id)
if not new_bot_id:
return "Failed to create a new bot after 403 error", new_workspace_id, workspace_id
# Update global variables
GLOBAL_WORKSPACE_ID = new_workspace_id
GLOBAL_BOT_ID = new_bot_id
# Try again with the new IDs
result, _, _ = upload_audio_file(file_path, new_bot_id, new_workspace_id)
return result, new_bot_id, new_workspace_id
else:
return f"Error: {response.status_code} - {response.text}", bot_id, workspace_id
# -------------------------------------------------------------------
# Main function that calls the Botpress API endpoint for audio transcription
# -------------------------------------------------------------------
def transcribe_audio(file_url, prompt, bot_id, workspace_id):
"""
Sends the audio file URL to the Botpress API endpoint for transcription,
returns the transcription text and (possibly updated) bot/workspace IDs.
"""
global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
# Prepare the headers
headers = {
"User-Agent": "Mozilla/5.0",
"x-bot-id": bot_id,
"x-workspace-id": workspace_id,
"Content-Type": "application/json",
"Cookie": AUTH_COOKIE
}
# Prepare the payload for the API
payload = {
"type": "openai:transcribeAudio",
"input": {
"fileUrl": file_url,
"prompt": prompt,
"temperature": 0
}
}
botpress_url = "https://api.botpress.cloud/v1/chat/actions"
max_retries = 3
timeout = 120 # Increased timeout for long audio files
# Attempt to send the request
for attempt in range(max_retries):
try:
print(f"Attempt {attempt+1}: Sending transcription request to Botpress API with bot_id={bot_id}, workspace_id={workspace_id}")
response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
# If successful (200)
if response.status_code == 200:
data = response.json()
# Extract all text segments from the response
transcription_text = ""
segments = data.get('output', {}).get('segments', [])
for segment in segments:
segment_text = segment.get('text', '')
if segment_text:
transcription_text += segment_text + " "
transcription_text = transcription_text.strip()
print(f"Successfully received transcription from Botpress API")
return transcription_text, bot_id, workspace_id
# If we get a 403 error, delete and recreate workspace/bot
elif response.status_code == 403:
print(f"Received 403 error. Deleting and recreating workspace/bot...")
break # Break out of the retry loop to handle 403 specially
# Handle network errors or timeouts (just retry)
elif response.status_code in [404, 408, 502, 503, 504]:
print(f"Received error {response.status_code}. Retrying...")
time.sleep(3) # Wait before retrying
continue
# Any other error status code
else:
print(f"Received unexpected error: {response.status_code}, {response.text}")
if attempt < max_retries - 1:
time.sleep(2)
continue
else:
return f"Unable to transcribe the audio (Error {response.status_code}).", bot_id, workspace_id
except requests.exceptions.Timeout:
print(f"Request timed out. Retrying...")
if attempt < max_retries - 1:
time.sleep(2)
continue
else:
return "The transcription is taking too long. Please try again with a shorter audio file.", bot_id, workspace_id
except Exception as e:
print(f"Error during request: {str(e)}")
if attempt < max_retries - 1:
time.sleep(2)
continue
else:
return f"Unable to transcribe the audio: {str(e)}", bot_id, workspace_id
# If we got a 403 error, delete and recreate resources
# First delete the bot, then the workspace (in that order)
if bot_id and workspace_id:
print(f"Deleting bot {bot_id} first...")
delete_success = delete_bot(bot_id, workspace_id)
if delete_success:
print(f"Successfully deleted bot {bot_id}")
else:
print(f"Failed to delete bot {bot_id}")
print(f"Now deleting workspace {workspace_id}...")
ws_delete_success = delete_workspace(workspace_id)
if ws_delete_success:
print(f"Successfully deleted workspace {workspace_id}")
else:
print(f"Failed to delete workspace {workspace_id}")
# Create new workspace
new_workspace_id = create_workspace()
if not new_workspace_id:
return "Failed to create a new workspace. Please try again later.", bot_id, workspace_id
# Create new bot in the new workspace
new_bot_id = create_bot(new_workspace_id)
if not new_bot_id:
return "Failed to create a new bot. Please try again later.", new_workspace_id, workspace_id
# Update global variables
GLOBAL_WORKSPACE_ID = new_workspace_id
GLOBAL_BOT_ID = new_bot_id
# Update headers with new bot ID and workspace ID
headers["x-bot-id"] = new_bot_id
headers["x-workspace-id"] = new_workspace_id
# Try one more time with the new IDs
try:
print(f"Retrying with new bot_id={new_bot_id}, workspace_id={new_workspace_id}")
retry_response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
if retry_response.status_code == 200:
data = retry_response.json()
# Extract all text segments from the response
transcription_text = ""
segments = data.get('output', {}).get('segments', [])
for segment in segments:
segment_text = segment.get('text', '')
if segment_text:
transcription_text += segment_text + " "
transcription_text = transcription_text.strip()
print(f"Successfully received transcription with new IDs")
return transcription_text, new_bot_id, new_workspace_id
else:
print(f"Failed with new IDs: {retry_response.status_code}, {retry_response.text}")
return f"Unable to transcribe the audio with new credentials.", new_bot_id, new_workspace_id
except Exception as e:
print(f"Error with new IDs: {str(e)}")
return f"Unable to transcribe the audio with new credentials: {str(e)}", new_bot_id, new_workspace_id
# -------------------------------------------------------------------
# Flask Endpoints
# -------------------------------------------------------------------
@app.route("/transcribe", methods=["POST"])
def transcribe_endpoint():
"""
Expects JSON with:
{
"file_url": "string",
"prompt": "string"
}
Returns JSON with:
{
"transcription": "string"
}
"""
global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
# Parse JSON from request
data = request.get_json(force=True)
file_url = data.get("file_url", "")
prompt = data.get("prompt", "get all text with his lang and exatract (DON'T translate) .")
if not file_url:
return jsonify({"error": "Missing file_url parameter"}), 400
# If we don't yet have a workspace or bot, create them
if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
print("No existing IDs found. Creating new workspace and bot...")
GLOBAL_WORKSPACE_ID = create_workspace()
if GLOBAL_WORKSPACE_ID:
GLOBAL_BOT_ID = create_bot(GLOBAL_WORKSPACE_ID)
# If creation failed
if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
return jsonify({"error": "I'm currently unavailable. Please try again later."}), 500
# Call our function that interacts with Botpress API
print(f"Sending transcription request with existing bot_id={GLOBAL_BOT_ID}, workspace_id={GLOBAL_WORKSPACE_ID}")
transcription, updated_bot_id, updated_workspace_id = transcribe_audio(
file_url,
prompt,
GLOBAL_BOT_ID,
GLOBAL_WORKSPACE_ID
)
# Update global IDs if they changed
if updated_bot_id != GLOBAL_BOT_ID or updated_workspace_id != GLOBAL_WORKSPACE_ID:
print(f"Updating global IDs: bot_id={updated_bot_id}, workspace_id={updated_workspace_id}")
GLOBAL_BOT_ID = updated_bot_id
GLOBAL_WORKSPACE_ID = updated_workspace_id
# Check if we got an error string back
if isinstance(transcription, str) and (
transcription.startswith("Failed") or
transcription.startswith("Unable") or
transcription.startswith("The transcription is taking too long")
):
return jsonify({"error": transcription}), 500
return jsonify({"transcription": transcription})
@app.route("/upload", methods=["POST"])
def upload_endpoint():
"""
Endpoint to upload an audio file and get its URL
Expects form data with a file field named 'audio'
Returns JSON with the file URL
"""
global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
# Check if file was uploaded
if 'audio' not in request.files:
return jsonify({"error": "No audio file provided"}), 400
audio_file = request.files['audio']
# Check if filename is empty
if audio_file.filename == '':
return jsonify({"error": "No audio file selected"}), 400
# If we don't yet have a workspace or bot, create them
if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
print("No existing IDs found. Creating new workspace and bot...")
GLOBAL_WORKSPACE_ID = create_workspace()
if GLOBAL_WORKSPACE_ID:
GLOBAL_BOT_ID = create_bot(GLOBAL_WORKSPACE_ID)
# If creation failed
if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
return jsonify({"error": "I'm currently unavailable. Please try again later."}), 500
# Save the file temporarily
temp_path = f"/tmp/{audio_file.filename}"
audio_file.save(temp_path)
# Upload the file
file_url, updated_bot_id, updated_workspace_id = upload_audio_file(temp_path, GLOBAL_BOT_ID, GLOBAL_WORKSPACE_ID)
# Remove the temporary file
os.remove(temp_path)
# Update global IDs if they changed
if updated_bot_id != GLOBAL_BOT_ID or updated_workspace_id != GLOBAL_WORKSPACE_ID:
print(f"Updating global IDs: bot_id={updated_bot_id}, workspace_id={updated_workspace_id}")
GLOBAL_BOT_ID = updated_bot_id
GLOBAL_WORKSPACE_ID = updated_workspace_id
# Check if we got an error string back
if isinstance(file_url, str) and (file_url.startswith("Error") or file_url.startswith("Failed") or file_url.startswith("No upload")):
return jsonify({"error": file_url}), 500
return jsonify({"file_url": file_url})
# -------------------------------------------------------------------
# Run the Flask app
# -------------------------------------------------------------------
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=True)