VoiceMenu20

Sleeping

App Files Files Community

DSatishchandra commited on Feb 5, 2025

Commit

0989798

verified ·

1 Parent(s): 1ea8f0f

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -18

app.py CHANGED Viewed

@@ -5,28 +5,20 @@ from transformers import pipeline
 from gtts import gTTS
 from pydub import AudioSegment
 from pydub.silence import detect_nonsilent
 from waitress import serve
 from simple_salesforce import Salesforce
-import time
-from transformers import pipeline
 app = Flask(__name__)
-retry_attempts = 3
-timeout = 60  # 1 minute timeout for each attempt
-model = None
-for attempt in range(retry_attempts):
-    try:
-        model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if torch.cuda.is_available() else -1, config={"timeout": timeout})
-        print("Model loaded successfully!")
-        break
-    except requests.exceptions.ReadTimeout:
-        print(f"Timeout occurred, retrying attempt {attempt + 1}/{retry_attempts}...")
-        time.sleep(5)  # Retry after 5 seconds
 # Use whisper-small for faster processing and better speed
 device = "cuda" if torch.cuda.is_available() else "cpu"
-asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
 # Function to generate audio prompts
 def generate_audio_prompt(text, filename):
@@ -63,7 +55,6 @@ def convert_to_wav(input_path, output_path):
         audio = AudioSegment.from_file(input_path)
         audio = audio.set_frame_rate(16000).set_channels(1)  # Convert to 16kHz, mono
         audio.export(output_path, format="wav")
-        print(f"Converted audio to {output_path}")
     except Exception as e:
         print(f"Error: {str(e)}")
         raise Exception(f"Audio conversion failed: {str(e)}")
@@ -110,7 +101,6 @@ def create_salesforce_record(name, email, phone_number):
         print(f"Error creating Salesforce record: {error_message}")
         return {"error": f"Failed to create record in Salesforce: {error_message}"}
 @app.route("/")
 def index():
     return render_template("index.html")
@@ -137,7 +127,20 @@ def transcribe():
             print("Audio contains speech, proceeding with transcription.")
         # Use Whisper ASR model for transcription
-        result = asr_model(output_audio_path, generate_kwargs={"language": "en"})
         transcribed_text = result["text"].strip().capitalize()
         print(f"Transcribed text: {transcribed_text}")

 from gtts import gTTS
 from pydub import AudioSegment
 from pydub.silence import detect_nonsilent
+from transformers import AutoConfig  # Import AutoConfig for the config object
+import time
 from waitress import serve
 from simple_salesforce import Salesforce
+import requests  # Import requests for exception handling
 app = Flask(__name__)
 # Use whisper-small for faster processing and better speed
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Create config object to set timeout and other parameters
+config = AutoConfig.from_pretrained("openai/whisper-small")
+config.update({"timeout": 60})  # Set timeout to 60 seconds
 # Function to generate audio prompts
 def generate_audio_prompt(text, filename):
         audio = AudioSegment.from_file(input_path)
         audio = audio.set_frame_rate(16000).set_channels(1)  # Convert to 16kHz, mono
         audio.export(output_path, format="wav")
     except Exception as e:
         print(f"Error: {str(e)}")
         raise Exception(f"Audio conversion failed: {str(e)}")
         print(f"Error creating Salesforce record: {error_message}")
         return {"error": f"Failed to create record in Salesforce: {error_message}"}
 @app.route("/")
 def index():
     return render_template("index.html")
             print("Audio contains speech, proceeding with transcription.")
         # Use Whisper ASR model for transcription
+        result = None
+        retry_attempts = 3
+        for attempt in range(retry_attempts):
+            try:
+                result = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if torch.cuda.is_available() else -1, config=config)
+                print(f"Transcribed text: {result['text']}")
+                break
+            except requests.exceptions.ReadTimeout:
+                print(f"Timeout occurred, retrying attempt {attempt + 1}/{retry_attempts}...")
+                time.sleep(5)
+        if result is None:
+            return jsonify({"error": "Unable to transcribe audio after retries."}), 500
         transcribed_text = result["text"].strip().capitalize()
         print(f"Transcribed text: {transcribed_text}")