Spaces:
Runtime error
Runtime error
File size: 2,896 Bytes
79b7942 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | import os
import shutil
import time
from concurrent.futures import ThreadPoolExecutor
from fastapi import APIRouter, FastAPI, File, Form, UploadFile
from fastapi.responses import HTMLResponse
from pydub import AudioSegment
from app.utils.ai_speech import pronunciation_assessment
from app.utils.common import aggregate_scores, remove_files, split_audio
from app.utils.get_scores import get_content_score
from app.models.speech_analysis import SpeechAnalysisResponse
from app.utils.constants import UPLOAD_DIR
from loguru import logger
speech_analysis_router = APIRouter()
@speech_analysis_router.post("/analyze", response_model=SpeechAnalysisResponse)
async def analyze_speech(audio_file: UploadFile = File(...), language: str = Form(...)):
# Make audio directory if not available
os.makedirs(UPLOAD_DIR, exist_ok=True)
# Save the uploaded file
s_time = time.time()
audio_file_path = os.path.join(UPLOAD_DIR, audio_file.filename)
with open(audio_file_path, "wb") as buffer:
shutil.copyfileobj(audio_file.file, buffer)
if audio_file_path.endswith("mp3"):
audio_file_path = convert_mp3_to_wav(audio_file_path, audio_file_path.replace("mp3", "wav"))
# Split audio if longer than 30 seconds
audio_length_ms = AudioSegment.from_wav(audio_file_path).duration_seconds * 1000
if audio_length_ms > 30000:
chunk_paths = split_audio(audio_file_path)
else:
chunk_paths = [audio_file_path] # No splitting needed
# Process each chunk for azure_speech_score in parallel
with ThreadPoolExecutor() as executor:
content_future = executor.submit(get_content_score, audio_file_path)
pronunciation_futures = [executor.submit(pronunciation_assessment, path, language) for path in chunk_paths]
content_score = content_future.result()
chunk_scores = [future.result() for future in pronunciation_futures]
# Aggregate azure_speech_scores across chunks
final_azure_speech_score = aggregate_scores(chunk_scores)
# Combine final results
final_azure_speech_score.update(
{
"grammar_score": content_score.get("grammar_score"),
"intonation_score": content_score.get("intonation_score"),
"comprehension_score": content_score.get("comprehension_score"),
"grammar_errors": content_score.get("grammar_errors"),
"pronunciation_feedback": "Demo Content",
"fluency_feedback": "Demo Content",
"accuracy_feedback": "Demo Content",
"grammar_feedback": "Demo Content",
"intonation_feedback": "Demo Content",
"comprehension_feedback": "Demo Content",
}
)
e_time = time.time()
total_time = e_time - s_time
logger.info(str(total_time))
chunk_paths.append(audio_file_path)
remove_files(chunk_paths)
return final_azure_speech_score
|