Spaces:

mulasagg
/

Voice

Sleeping

App Files Files Community

mulasagg commited on May 20, 2025

Commit

27acc7d

1 Parent(s): 8031a8f

push new

Browse files

Files changed (3) hide show

app.py +8 -11
transcribe.py +14 -20
vers/compute_vers_score.py +3 -0

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ from vps.vps_api import main as analyze_vps_main
 from ves.ves import calc_voice_engagement_score
 from transcribe import transcribe_audio
 from filler_count.filler_score import analyze_fillers
-#from emotion.emo_predict import predict_emotion
 app = FastAPI()
@@ -290,9 +290,9 @@ import time
 @app.post('/transcribe/')
-async def transcribe(file: UploadFile):
     """
-    Endpoint to transcribe an uploaded audio file ('.wav', '.mp3','mp4','.m4a','.flac' ).
     """
     #calculate time to transcribe
     start_time = time.time()
@@ -311,7 +311,7 @@ async def transcribe(file: UploadFile):
             shutil.copyfileobj(file.file, buffer)
         # Transcribe using your custom function
-        result = transcribe_audio(temp_filepath,  model_size="base")
         end_time = time.time()
         transcription_time = end_time - start_time
         response = {
@@ -329,14 +329,12 @@ async def transcribe(file: UploadFile):
         if os.path.exists(temp_filepath):
             os.remove(temp_filepath)
-import datetime
 @app.post('/analyze_all/')
-async def analyze_all(file: UploadFile):
     """
     Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
     """
-    print(f"Received request at {datetime.datetime.now()} for file: {file.filename}")
     if not file.filename.endswith(('.wav', '.mp3','.m4a','.mp4','.flac')):
         raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
@@ -360,8 +358,8 @@ async def analyze_all(file: UploadFile):
         vps_result = analyze_vps_main(temp_filepath)
         ves_result = calc_voice_engagement_score(temp_filepath)
         filler_count = analyze_fillers(temp_filepath)  # Assuming this function returns a dict with filler count
-        transcript, language, _ = transcribe_audio(temp_filepath, "base") #fix this
-        #emotion = predict_emotion(temp_filepath)
         avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
@@ -376,8 +374,7 @@ async def analyze_all(file: UploadFile):
             "ves": ves_result,
             "filler_words": filler_count,
             "transcript": transcript,
-            "Detected Language": language,
-            #"emotion": emotion ,
             "sank_score": avg_score
         }

 from ves.ves import calc_voice_engagement_score
 from transcribe import transcribe_audio
 from filler_count.filler_score import analyze_fillers
+from emotion.emo_predict import predict_emotion
 app = FastAPI()
 @app.post('/transcribe/')
+async def transcribe(file: UploadFile, language: str = Form(...)):
     """
+    Endpoint to transcribe an uploaded audio file (.wav or .mp3).
     """
     #calculate time to transcribe
     start_time = time.time()
             shutil.copyfileobj(file.file, buffer)
         # Transcribe using your custom function
+        result = transcribe_audio(temp_filepath, language=language, model_size="base")
         end_time = time.time()
         transcription_time = end_time - start_time
         response = {
         if os.path.exists(temp_filepath):
             os.remove(temp_filepath)
 @app.post('/analyze_all/')
+async def analyze_all(file: UploadFile, language: str = Form(...)):
     """
     Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
     """
     if not file.filename.endswith(('.wav', '.mp3','.m4a','.mp4','.flac')):
         raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
         vps_result = analyze_vps_main(temp_filepath)
         ves_result = calc_voice_engagement_score(temp_filepath)
         filler_count = analyze_fillers(temp_filepath)  # Assuming this function returns a dict with filler count
+        transcript = transcribe_audio(temp_filepath, language, "base") #fix this
+        emotion = predict_emotion(temp_filepath)
         avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
             "ves": ves_result,
             "filler_words": filler_count,
             "transcript": transcript,
+            "emotion": emotion ,
             "sank_score": avg_score
         }

transcribe.py CHANGED Viewed

@@ -1,32 +1,26 @@
 import assemblyai as aai
-aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4"  # Use env var in production
-def transcribe_audio(file_path: str, model_size=None) -> tuple[str, str, float]:
-    print(f"Transcribing audio file: {file_path} with language detection")
     config = aai.TranscriptionConfig(
         speech_model=aai.SpeechModel.nano,
-        language_detection=True,
-        language_confidence_threshold=0.4
     )
-    transcriber = aai.Transcriber()
-    transcript = transcriber.transcribe(file_path, config)
     if transcript.status == "error":
         raise RuntimeError(f"Transcription failed: {transcript.error}")
-    # Access detected language and confidence from json_response
-    response = transcript.json_response
-    language = response.get("language_code")
-    confidence = response.get("language_confidence")
-    result = {
-        "transcript": transcript.text,
-        "language": language,
-        "confidence": confidence
-    }
-    return transcript.text, language, confidence

 import assemblyai as aai
+# Set your AssemblyAI API key once
+aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4"  # Replace with env var for production
+def transcribe_audio(file_path: str, language, model_size=None) -> str:
+    print(f"Transcribing audio file: {file_path} with language: {language}")
+    # Configure for Hindi language
     config = aai.TranscriptionConfig(
         speech_model=aai.SpeechModel.nano,
+        language_code=language
     )
+    # Create transcriber instance
+    transcriber = aai.Transcriber(config=config)
+    # Perform transcription
+    transcript = transcriber.transcribe(file_path)
+    # Check if successful
     if transcript.status == "error":
         raise RuntimeError(f"Transcription failed: {transcript.error}")
+    return transcript.text

vers/compute_vers_score.py CHANGED Viewed

@@ -79,4 +79,7 @@ def compute_vers_score(file_path: str, whisper_model) -> dict:
         volume_std=volume_std,
         valence_scores=valence_scores
     )
     return vers_result

         volume_std=volume_std,
         valence_scores=valence_scores
     )
+    # Include transcript optionally
+    vers_result["transcript"] = transcript
     return vers_result