Spaces:

cruvss
/

Fast_api

Sleeping

App Files Files Community

mulasagg commited on May 18, 2025

Commit

b4db241

1 Parent(s): b4aa0c5

add transcribe in hindi

Browse files

Files changed (2) hide show

app.py +7 -6
transcribe.py +23 -21

app.py CHANGED Viewed

@@ -289,14 +289,14 @@ import time
 @app.post('/transcribe/')
-async def transcribe(file: UploadFile):
     """
     Endpoint to transcribe an uploaded audio file (.wav or .mp3).
     """
     #calculate time to transcribe
     start_time = time.time()
-    if not file.filename.endswith(('.wav', '.mp3')):
-        raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
     # Generate a safe temporary file path
     temp_filename = f"temp_{uuid.uuid4()}{os.path.splitext(file.filename)[1]}"
@@ -310,7 +310,7 @@ async def transcribe(file: UploadFile):
             shutil.copyfileobj(file.file, buffer)
         # Transcribe using your custom function
-        result = transcribe_audio(temp_filepath)
         end_time = time.time()
         transcription_time = end_time - start_time
         response = {
@@ -329,9 +329,10 @@ async def transcribe(file: UploadFile):
             os.remove(temp_filepath)
 @app.post('/analyze_all/')
-async def analyze_all(file: UploadFile):
     """
     Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
     """
@@ -358,7 +359,7 @@ async def analyze_all(file: UploadFile):
         vps_result = analyze_vps_main(temp_filepath)
         ves_result = calc_voice_engagement_score(temp_filepath)
         filler_count = analyze_fillers(temp_filepath)  # Assuming this function returns a dict with filler count
-        transcript = transcribe_audio(temp_filepath)
         # Combine results into a single response
         combined_result = {

 @app.post('/transcribe/')
+async def transcribe(file: UploadFile, language: str = Form(...)):
     """
     Endpoint to transcribe an uploaded audio file (.wav or .mp3).
     """
     #calculate time to transcribe
     start_time = time.time()
+    if not file.filename.endswith(('.wav', '.mp3','mp4')):
+        raise HTTPException(status_code=400, detail="Invalid file type. Only .wav ,mp4 and .mp3 files are supported.")
     # Generate a safe temporary file path
     temp_filename = f"temp_{uuid.uuid4()}{os.path.splitext(file.filename)[1]}"
             shutil.copyfileobj(file.file, buffer)
         # Transcribe using your custom function
+        result = transcribe_audio(temp_filepath, language=language, model_size="base")
         end_time = time.time()
         transcription_time = end_time - start_time
         response = {
             os.remove(temp_filepath)
+from fastapi import UploadFile, Form
 @app.post('/analyze_all/')
+async def analyze_all(file: UploadFile, language: str = Form(...)):
     """
     Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
     """
         vps_result = analyze_vps_main(temp_filepath)
         ves_result = calc_voice_engagement_score(temp_filepath)
         filler_count = analyze_fillers(temp_filepath)  # Assuming this function returns a dict with filler count
+        transcript = transcribe_audio(temp_filepath, language, "base") #fix this
         # Combine results into a single response
         combined_result = {

transcribe.py CHANGED Viewed

@@ -1,24 +1,26 @@
-# using whisper to transcribe audio files
-import whisper
-import os
-def transcribe_audio(file_path, model_size="base"):
-    """
-    Transcribe audio file using Whisper model.
-    Args:
-        file_path (str): Path to the audio file.
-        model_size (str): Size of the Whisper model to use. Options are "tiny", "base", "small", "medium", "large".
-    Returns:
-        str: Transcription of the audio file.
-    """
-    # Load the Whisper model
-    model = whisper.load_model(model_size)
-    # Transcribe the audio file
-    result = model.transcribe(file_path, fp16=False)
-    # Return the transcription
-    return result["text"]

+import assemblyai as aai
+# Set your AssemblyAI API key once
+aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4"  # Replace with env var for production
+def transcribe_audio(file_path: str, language, model_size=None) -> str:
+    print(f"Transcribing audio file: {file_path} with language: {language}")
+    # Configure for Hindi language
+    config = aai.TranscriptionConfig(
+        speech_model=aai.SpeechModel.best,
+        language_code=language
+    )
+    # Create transcriber instance
+    transcriber = aai.Transcriber(config=config)
+    # Perform transcription
+    transcript = transcriber.transcribe(file_path)
+    # Check if successful
+    if transcript.status == "error":
+        raise RuntimeError(f"Transcription failed: {transcript.error}")
+    return transcript.text