BACK-END

Sleeping

App Files Files Community

CORVO-AI commited on May 18, 2025

Commit

485895c

verified ·

1 Parent(s): f129942

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -85

app.py CHANGED Viewed

@@ -5,13 +5,13 @@ import string
 import time
 app = Flask(__name__)
 # Global variables to store workspace and bot IDs
 GLOBAL_WORKSPACE_ID = None
 GLOBAL_BOT_ID = None
-# cookie value
-AUTH_COOKIE = "pscd=try.botpress.com; _hjSessionUser_2931810=eyJpZCI6ImQ2MGMzYjhkLTlkMjQtNTA0OS1hMzlmLWEzNmI0NzA0NzUxNCIsImNyZWF0ZWQiOjE3MzU3MTg0MDcwNTAsImV4aXN0aW5nIjp0cnVlfQ==; hubspotutk=75739411a4d011b2164c4f3d944ecb94; intercom-device-id-bjzkw2xf=afd0a36b-b229-44e3-828e-60483c80c10c; _hjSessionUser_3339867=eyJpZCI6IjU4ODlmMTY4LWRkNGEtNTJhZS1hZTUzLWZlYWQwM2ZmMTVjNyIsImNyZWF0ZWQiOjE3MzU3MTg1ODM4MDgsImV4aXN0aW5nIjp0cnVlfQ==; __hstc=59821234.75739411a4d011b2164c4f3d944ecb94.1735718442141.1746194848836.1746538539919.88; mp_1195923e954ce61d822842b5832047cd_mixpanel=%7B%22distinct_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%2C%22%24device_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%2C%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fapp.botpress.cloud%2F%22%2C%22%24initial_referring_domain%22%3A%20%22app.botpress.cloud%22%2C%22__mps%22%3A%20%7B%7D%2C%22__mpso%22%3A%20%7B%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fapp.botpress.cloud%2F%22%2C%22%24initial_referring_domain%22%3A%20%22app.botpress.cloud%22%7D%2C%22__mpus%22%3A%20%7B%7D%2C%22__mpa%22%3A%20%7B%7D%2C%22__mpu%22%3A%20%7B%7D%2C%22__mpr%22%3A%20%5B%5D%2C%22__mpap%22%3A%20%5B%5D%2C%22%24user_id%22%3A%20%22d403ad7b-ea73-4d29-b977-5fd95afd585c%22%7D; intercom-session-bjzkw2xf=TTdnZGNWUC9xNXMreE80NXhRZFNZS0pyUEdsbkJRc2JMcXdGZmcveVRPYkxZTmVnVnhqMUhJTWlDcEpVcWljeDZVYVVSblN4YnV5S0xBdWxDd2swQjZiaUZTeWl5M1psRmtoUWJwUU9FSFE9LS1Bay9zNldJTmVhUFdwMFNReFRmcXB3PT0=--68abc5394d7aab99748f3e451637cab5d9152a4c; _ga=GA1.2.1726154447.1735718383; _gid=GA1.2.1619749406.1746821934; _gat_UA-226900660-1=1; _gat_UA-226900660-2=1; _ga_W6YT9YSNLH=GS2.2.s1746821948$o89$g0$t1746821948$j0$l0$h0; _ga_CYSS87Q508=GS2.2.s1746821949$o89$g0$t1746821949$j0$l0$h0; _hjSession_2931810=eyJpZCI6IjA1YTlkMzY3LWNiZWEtNGQ3OC04YzNiLTEzNTFjNjkxYzViZSIsImMiOjE3NDY4MjE5NTU2NjYsInMiOjAsInIiOjAsInNiIjowLCJzciI6MCwic2UiOjAsImZzIjowLCJzcCI6MH0=; _ga_PCC6TBWJY6=GS2.1.s1746821926$o120$g1$t1746821960$j0$l0$h0; _ga_HKHSWES9V9=GS2.1.s1746821929$o120$g1$t1746821960$j29$l0$h193244206; csrf_token_bd9ac21c34b9f0915e733c3e5305d737d0722c1168be7376b889426b5ec2a298=pIDxu4npODUGEpo7JHVQrKZ4GFaa3U+3BpgxaV5hcVw=; ory_kratos_session=MTc0NjgyMjAyOHxfUmdMaVlTQXVfSmxlT1lJSGpyU2FhbjVUTHg0R0ZsQWgtVm00M3pHcXZwVG9yNW1qRDJheUFGaFZvNmFEUVdBOThQR014RjJJbmhUMmhIV1I1ME5UVHZkTDNpMUMtQlRjZ1ZTbE55M19Pb2dHTF9vQlJoSGlBQnRRWUp0M1ZUdnVvcENLeVhOTllWNk1zMk11bFVPOWFrTzJMTTdxMmVteUozVVRDMWE5TVIxbDgzU3dUY2VQaDBRWDN4bDJUVm8yUkZQa19sb09GbzlFZHF2MDFQcVR6bVVWVVpDLXVoQ1lXMEh2LV9Sd2VNZXM1cjM4TGZPVTJqdW5xNTBETTBDYkppU0xNU2xicUk3Z2EyMnFkVmdyQT09fEYiv7pXcfXVnpIFi4JLGgDObQAchyJCoAwGDSkFkoX7; ajs_user_id=d403ad7b-ea73-4d29-b977-5fd95afd585c; ajs_anonymous_id=cda6139d-cb82-4906-bfac-adaea115b097"
 # -------------------------------------------------------------------
 # Helper functions for random bot/workspace names
@@ -177,73 +177,83 @@ def delete_workspace(workspace_id):
 # -------------------------------------------------------------------
-# Function to transcribe audio
 # -------------------------------------------------------------------
-def transcribe_audio(file_url, language="", prompt="get all text from this Audio", temperature=0, bot_id=None, workspace_id=None):
     """
-    Sends an audio file URL to the Botpress API for transcription
-    Uses bot/workspace management for authentication
     """
     # Prepare the headers
     headers = {
         "User-Agent": "Mozilla/5.0",
-        "Content-Type": "application/json",
         "x-bot-id": bot_id,
         "Cookie": AUTH_COOKIE
     }
-    # Add bot ID to headers if available
-    if bot_id:
-        headers["x-bot-id"] = bot_id
-    # Prepare the payload for audio transcription
     payload = {
         "type": "openai:transcribeAudio",
         "input": {
             "fileUrl": file_url,
-            "language": language,
             "prompt": prompt,
-            "temperature": temperature
         }
     }
     botpress_url = "https://api.botpress.cloud/v1/chat/actions"
     max_retries = 3
-    timeout = 180  # Longer timeout for audio processing
-    # Flag to track if we need to create new IDs
-    need_new_ids = False
     # Attempt to send the request
     for attempt in range(max_retries):
         try:
-            print(f"Attempt {attempt+1}: Sending transcription request to Botpress API")
-            if bot_id:
-                print(f"Using bot_id={bot_id}, workspace_id={workspace_id}")
             response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
             # If successful (200)
             if response.status_code == 200:
                 data = response.json()
-                # Extract the transcription text from all segments
                 segments = data.get('output', {}).get('segments', [])
-                transcription = " ".join([segment.get('text', '').strip() for segment in segments])
                 print(f"Successfully received transcription from Botpress API")
-                return transcription, data, bot_id, workspace_id
-            # Handle 403/404 errors (authentication/authorization issue)
-            elif response.status_code in [403, 404]:
-                print(f"Received {response.status_code} error. Need to create new IDs.")
-                need_new_ids = True
-                break
             # Handle network errors or timeouts (just retry)
-            elif response.status_code in [443, 408, 502, 503, 504]:
                 print(f"Received error {response.status_code}. Retrying...")
-                time.sleep(3)
                 continue
             # Any other error status code
@@ -253,70 +263,91 @@ def transcribe_audio(file_url, language="", prompt="get all text from this Audio
                     time.sleep(2)
                     continue
                 else:
-                    return f"Unable to transcribe audio (Error {response.status_code}).", None, bot_id, workspace_id
         except requests.exceptions.Timeout:
-            print(f"Transcription request timed out. Retrying...")
             if attempt < max_retries - 1:
-                time.sleep(3)
                 continue
             else:
-                return "The transcription is taking too long. Please try with a shorter audio file.", None, bot_id, workspace_id
         except Exception as e:
-            print(f"Error during transcription request: {str(e)}")
             if attempt < max_retries - 1:
-                time.sleep(3)
                 continue
             else:
-                return f"Unable to transcribe audio: {str(e)}", None, bot_id, workspace_id
-    # If we need new IDs, create them and try again
-    if need_new_ids:
-        print("Creating new workspace and bot IDs for transcription...")
-        # First, try to clean up old resources
         if bot_id and workspace_id:
-            delete_bot(bot_id, workspace_id)
-            delete_workspace(workspace_id)
-        # Create new resources
         new_workspace_id = create_workspace()
         if not new_workspace_id:
-            return "Failed to create a new workspace for transcription. Please try again later.", None, bot_id, workspace_id
         new_bot_id = create_bot(new_workspace_id)
         if not new_bot_id:
-            return "Failed to create a new bot for transcription. Please try again later.", None, new_workspace_id, workspace_id
-        # Update headers with new bot ID
         headers["x-bot-id"] = new_bot_id
         # Try one more time with the new IDs
         try:
-            print(f"Retrying transcription with new bot_id={new_bot_id}, workspace_id={new_workspace_id}")
             retry_response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
             if retry_response.status_code == 200:
                 data = retry_response.json()
                 segments = data.get('output', {}).get('segments', [])
-                transcription = " ".join([segment.get('text', '').strip() for segment in segments])
                 print(f"Successfully received transcription with new IDs")
-                return transcription, data, new_bot_id, new_workspace_id
             else:
-                print(f"Transcription failed with new IDs: {retry_response.status_code}, {retry_response.text}")
-                return f"Unable to transcribe audio with new credentials.", None, new_bot_id, new_workspace_id
         except Exception as e:
-            print(f"Error with new IDs during transcription: {str(e)}")
-            return f"Unable to transcribe audio with new credentials: {str(e)}", None, new_bot_id, new_workspace_id
     # Should not reach here due to the handling in the loop
-    return "Unable to transcribe audio.", None, bot_id, workspace_id
 # -------------------------------------------------------------------
-# Flask Endpoint for Transcription
 # -------------------------------------------------------------------
 @app.route("/transcribe", methods=["POST"])
 def transcribe_endpoint():
@@ -324,15 +355,11 @@ def transcribe_endpoint():
     Expects JSON with:
     {
       "file_url": "string",
-      "language": "string" (optional),
-      "prompt": "string" (optional),
-      "temperature": float (optional),
-      "include_full_response": boolean (optional)
     }
     Returns JSON with:
     {
-      "transcription": "string",
-      "full_response": {} (optional)
     }
     """
     global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
@@ -340,51 +367,38 @@ def transcribe_endpoint():
     # Parse JSON from request
     data = request.get_json(force=True)
     file_url = data.get("file_url", "")
-    language = data.get("language", "")
-    prompt = data.get("prompt", "get all text from this Audio")
-    temperature = data.get("temperature", 0)
-    include_full_response = data.get("include_full_response", False)
-    # Validate input
     if not file_url:
         return jsonify({"error": "Missing file_url parameter"}), 400
     # If we don't yet have a workspace or bot, create them
     if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
-        print("No existing IDs found for transcription. Creating new workspace and bot...")
         GLOBAL_WORKSPACE_ID = create_workspace()
         if GLOBAL_WORKSPACE_ID:
             GLOBAL_BOT_ID = create_bot(GLOBAL_WORKSPACE_ID)
         # If creation failed
         if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
-            return jsonify({"transcription": "Transcription service is currently unavailable. Please try again later."}), 500
-    # Call our function that interacts with Botpress API for transcription
     print(f"Sending transcription request with existing bot_id={GLOBAL_BOT_ID}, workspace_id={GLOBAL_WORKSPACE_ID}")
-    transcription, full_data, updated_bot_id, updated_workspace_id = transcribe_audio(
         file_url,
-        language,
         prompt,
-        temperature,
         GLOBAL_BOT_ID,
         GLOBAL_WORKSPACE_ID
     )
     # Update global IDs if they changed
     if updated_bot_id != GLOBAL_BOT_ID or updated_workspace_id != GLOBAL_WORKSPACE_ID:
-        print(f"Updating global IDs after transcription: bot_id={updated_bot_id}, workspace_id={updated_workspace_id}")
         GLOBAL_BOT_ID = updated_bot_id
         GLOBAL_WORKSPACE_ID = updated_workspace_id
-    # Prepare the response
-    response = {"transcription": transcription}
-    # Include full response data if requested
-    if include_full_response and full_data:
-        response["full_response"] = full_data
-    return jsonify(response)
 # -------------------------------------------------------------------

 import time
 app = Flask(__name__)
 # Global variables to store workspace and bot IDs
 GLOBAL_WORKSPACE_ID = None
 GLOBAL_BOT_ID = None
+# Cookie value used in requests (should be updated with a valid cookie)
+AUTH_COOKIE = ""
 # -------------------------------------------------------------------
 # Helper functions for random bot/workspace names
 # -------------------------------------------------------------------
+# Main function that calls the Botpress API endpoint for audio transcription
 # -------------------------------------------------------------------
+def transcribe_audio(file_url, prompt, bot_id, workspace_id):
     """
+    Sends the audio file URL to the Botpress API endpoint for transcription,
+    returns the transcription text and (possibly updated) bot/workspace IDs.
     """
     # Prepare the headers
     headers = {
         "User-Agent": "Mozilla/5.0",
         "x-bot-id": bot_id,
+        "x-workspace-id": workspace_id,
+        "Content-Type": "application/json",
         "Cookie": AUTH_COOKIE
     }
+    # Prepare the payload for the API
     payload = {
         "type": "openai:transcribeAudio",
         "input": {
             "fileUrl": file_url,
             "prompt": prompt,
+            "temperature": 0
         }
     }
     botpress_url = "https://api.botpress.cloud/v1/chat/actions"
     max_retries = 3
+    timeout = 120  # Increased timeout for long audio files
+    # Flag to track if we need to create new IDs due to quota exceeded
+    quota_exceeded = False
     # Attempt to send the request
     for attempt in range(max_retries):
         try:
+            print(f"Attempt {attempt+1}: Sending transcription request to Botpress API with bot_id={bot_id}, workspace_id={workspace_id}")
             response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
             # If successful (200)
             if response.status_code == 200:
                 data = response.json()
+                # Extract all text segments from the response
+                transcription_text = ""
                 segments = data.get('output', {}).get('segments', [])
+                for segment in segments:
+                    segment_text = segment.get('text', '')
+                    if segment_text:
+                        transcription_text += segment_text + " "
+                transcription_text = transcription_text.strip()
                 print(f"Successfully received transcription from Botpress API")
+                return transcription_text, bot_id, workspace_id
+            # Check for quota exceeded error specifically
+            elif response.status_code == 403:
+                error_data = response.json()
+                error_message = error_data.get('message', '')
+                # Check if this is the specific quota exceeded error
+                if "has reached its usage limit for ai spend" in error_message:
+                    print(f"Quota exceeded error detected: {error_message}")
+                    quota_exceeded = True
+                    break
+                else:
+                    print(f"Received 403 error but not quota exceeded: {error_message}")
+                    if attempt < max_retries - 1:
+                        time.sleep(2)
+                        continue
+                    else:
+                        return f"Unable to transcribe the audio (Error 403).", bot_id, workspace_id
             # Handle network errors or timeouts (just retry)
+            elif response.status_code in [404, 408, 502, 503, 504]:
                 print(f"Received error {response.status_code}. Retrying...")
+                time.sleep(3)  # Wait before retrying
                 continue
             # Any other error status code
                     time.sleep(2)
                     continue
                 else:
+                    return f"Unable to transcribe the audio (Error {response.status_code}).", bot_id, workspace_id
         except requests.exceptions.Timeout:
+            print(f"Request timed out. Retrying...")
             if attempt < max_retries - 1:
+                time.sleep(2)
                 continue
             else:
+                return "The transcription is taking too long. Please try again with a shorter audio file.", bot_id, workspace_id
         except Exception as e:
+            print(f"Error during request: {str(e)}")
             if attempt < max_retries - 1:
+                time.sleep(2)
                 continue
             else:
+                return f"Unable to transcribe the audio: {str(e)}", bot_id, workspace_id
+    # If quota exceeded, we need to create new resources
+    if quota_exceeded:
+        print("Quota exceeded. Creating new workspace and bot...")
+        # First delete the bot, then the workspace (in that order)
         if bot_id and workspace_id:
+            print(f"Deleting bot {bot_id} first...")
+            delete_success = delete_bot(bot_id, workspace_id)
+            if delete_success:
+                print(f"Successfully deleted bot {bot_id}")
+            else:
+                print(f"Failed to delete bot {bot_id}")
+            print(f"Now deleting workspace {workspace_id}...")
+            ws_delete_success = delete_workspace(workspace_id)
+            if ws_delete_success:
+                print(f"Successfully deleted workspace {workspace_id}")
+            else:
+                print(f"Failed to delete workspace {workspace_id}")
+        # Create new workspace
         new_workspace_id = create_workspace()
         if not new_workspace_id:
+            return "Failed to create a new workspace after quota exceeded. Please try again later.", bot_id, workspace_id
+        # Create new bot in the new workspace
         new_bot_id = create_bot(new_workspace_id)
         if not new_bot_id:
+            return "Failed to create a new bot after quota exceeded. Please try again later.", new_workspace_id, workspace_id
+        # Update headers with new bot ID and workspace ID
         headers["x-bot-id"] = new_bot_id
+        headers["x-workspace-id"] = new_workspace_id
         # Try one more time with the new IDs
         try:
+            print(f"Retrying with new bot_id={new_bot_id}, workspace_id={new_workspace_id}")
             retry_response = requests.post(botpress_url, json=payload, headers=headers, timeout=timeout)
             if retry_response.status_code == 200:
                 data = retry_response.json()
+                # Extract all text segments from the response
+                transcription_text = ""
                 segments = data.get('output', {}).get('segments', [])
+                for segment in segments:
+                    segment_text = segment.get('text', '')
+                    if segment_text:
+                        transcription_text += segment_text + " "
+                transcription_text = transcription_text.strip()
                 print(f"Successfully received transcription with new IDs")
+                return transcription_text, new_bot_id, new_workspace_id
             else:
+                print(f"Failed with new IDs: {retry_response.status_code}, {retry_response.text}")
+                return f"Unable to transcribe the audio with new credentials.", new_bot_id, new_workspace_id
         except Exception as e:
+            print(f"Error with new IDs: {str(e)}")
+            return f"Unable to transcribe the audio with new credentials: {str(e)}", new_bot_id, new_workspace_id
     # Should not reach here due to the handling in the loop
+    return "Unable to transcribe the audio.", bot_id, workspace_id
 # -------------------------------------------------------------------
+# Flask Endpoint
 # -------------------------------------------------------------------
 @app.route("/transcribe", methods=["POST"])
 def transcribe_endpoint():
     Expects JSON with:
     {
       "file_url": "string",
+      "prompt": "string"
     }
     Returns JSON with:
     {
+      "transcription": "string"
     }
     """
     global GLOBAL_WORKSPACE_ID, GLOBAL_BOT_ID
     # Parse JSON from request
     data = request.get_json(force=True)
     file_url = data.get("file_url", "")
+    prompt = data.get("prompt", "get all text with his lang and exatract (DON'T translate) .")
     if not file_url:
         return jsonify({"error": "Missing file_url parameter"}), 400
     # If we don't yet have a workspace or bot, create them
     if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
+        print("No existing IDs found. Creating new workspace and bot...")
         GLOBAL_WORKSPACE_ID = create_workspace()
         if GLOBAL_WORKSPACE_ID:
             GLOBAL_BOT_ID = create_bot(GLOBAL_WORKSPACE_ID)
         # If creation failed
         if not GLOBAL_WORKSPACE_ID or not GLOBAL_BOT_ID:
+            return jsonify({"error": "I'm currently unavailable. Please try again later."}), 500
+    # Call our function that interacts with Botpress API
     print(f"Sending transcription request with existing bot_id={GLOBAL_BOT_ID}, workspace_id={GLOBAL_WORKSPACE_ID}")
+    transcription, updated_bot_id, updated_workspace_id = transcribe_audio(
         file_url,
         prompt,
         GLOBAL_BOT_ID,
         GLOBAL_WORKSPACE_ID
     )
     # Update global IDs if they changed
     if updated_bot_id != GLOBAL_BOT_ID or updated_workspace_id != GLOBAL_WORKSPACE_ID:
+        print(f"Updating global IDs: bot_id={updated_bot_id}, workspace_id={updated_workspace_id}")
         GLOBAL_BOT_ID = updated_bot_id
         GLOBAL_WORKSPACE_ID = updated_workspace_id
+    return jsonify({"transcription": transcription})
 # -------------------------------------------------------------------