Spaces:

jaykishan-b
/

speech-analysis

Runtime error

App Files Files Community

speech-analysis / app /services /speech_analysis.py

jaykishan-b

init

79b7942 over 1 year ago

raw

history blame contribute delete

2.9 kB

	import os
	import shutil
	import time
	from concurrent.futures import ThreadPoolExecutor

	from fastapi import APIRouter, FastAPI, File, Form, UploadFile
	from fastapi.responses import HTMLResponse
	from pydub import AudioSegment

	from app.utils.ai_speech import pronunciation_assessment
	from app.utils.common import aggregate_scores, remove_files, split_audio
	from app.utils.get_scores import get_content_score
	from app.models.speech_analysis import SpeechAnalysisResponse
	from app.utils.constants import UPLOAD_DIR
	from loguru import logger

	speech_analysis_router = APIRouter()


	@speech_analysis_router.post("/analyze", response_model=SpeechAnalysisResponse)
	async def analyze_speech(audio_file: UploadFile = File(...), language: str = Form(...)):
	# Make audio directory if not available
	os.makedirs(UPLOAD_DIR, exist_ok=True)

	# Save the uploaded file
	s_time = time.time()
	audio_file_path = os.path.join(UPLOAD_DIR, audio_file.filename)

	with open(audio_file_path, "wb") as buffer:
	shutil.copyfileobj(audio_file.file, buffer)

	if audio_file_path.endswith("mp3"):
	audio_file_path = convert_mp3_to_wav(audio_file_path, audio_file_path.replace("mp3", "wav"))

	# Split audio if longer than 30 seconds
	audio_length_ms = AudioSegment.from_wav(audio_file_path).duration_seconds * 1000
	if audio_length_ms > 30000:
	chunk_paths = split_audio(audio_file_path)
	else:
	chunk_paths = [audio_file_path] # No splitting needed

	# Process each chunk for azure_speech_score in parallel
	with ThreadPoolExecutor() as executor:
	content_future = executor.submit(get_content_score, audio_file_path)
	pronunciation_futures = [executor.submit(pronunciation_assessment, path, language) for path in chunk_paths]
	content_score = content_future.result()
	chunk_scores = [future.result() for future in pronunciation_futures]

	# Aggregate azure_speech_scores across chunks
	final_azure_speech_score = aggregate_scores(chunk_scores)

	# Combine final results
	final_azure_speech_score.update(
	{
	"grammar_score": content_score.get("grammar_score"),
	"intonation_score": content_score.get("intonation_score"),
	"comprehension_score": content_score.get("comprehension_score"),
	"grammar_errors": content_score.get("grammar_errors"),
	"pronunciation_feedback": "Demo Content",
	"fluency_feedback": "Demo Content",
	"accuracy_feedback": "Demo Content",
	"grammar_feedback": "Demo Content",
	"intonation_feedback": "Demo Content",
	"comprehension_feedback": "Demo Content",
	}
	)

	e_time = time.time()
	total_time = e_time - s_time
	logger.info(str(total_time))

	chunk_paths.append(audio_file_path)
	remove_files(chunk_paths)

	return final_azure_speech_score