recover from backup
Browse files- .gitignore +1 -0
- app.py +11 -8
- emotion/__pycache__/emo_predict.cpython-312.pyc +0 -0
- filler_count/__pycache__/filler_score.cpython-312.pyc +0 -0
- tone_modulation/__pycache__/tone_api.cpython-312.pyc +0 -0
- transcribe.py +20 -14
- vers/__pycache__/compute_vers_score.cpython-312.pyc +0 -0
- vers/compute_vers_score.py +0 -3
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
**/__pycache__/
|
app.py
CHANGED
|
@@ -18,7 +18,7 @@ from vps.vps_api import main as analyze_vps_main
|
|
| 18 |
from ves.ves import calc_voice_engagement_score
|
| 19 |
from transcribe import transcribe_audio
|
| 20 |
from filler_count.filler_score import analyze_fillers
|
| 21 |
-
from emotion.emo_predict import predict_emotion
|
| 22 |
|
| 23 |
app = FastAPI()
|
| 24 |
|
|
@@ -290,9 +290,9 @@ import time
|
|
| 290 |
|
| 291 |
|
| 292 |
@app.post('/transcribe/')
|
| 293 |
-
async def transcribe(file: UploadFile
|
| 294 |
"""
|
| 295 |
-
Endpoint to transcribe an uploaded audio file (.wav
|
| 296 |
"""
|
| 297 |
#calculate time to transcribe
|
| 298 |
start_time = time.time()
|
|
@@ -311,7 +311,7 @@ async def transcribe(file: UploadFile, language: str = Form(...)):
|
|
| 311 |
shutil.copyfileobj(file.file, buffer)
|
| 312 |
|
| 313 |
# Transcribe using your custom function
|
| 314 |
-
result = transcribe_audio(temp_filepath,
|
| 315 |
end_time = time.time()
|
| 316 |
transcription_time = end_time - start_time
|
| 317 |
response = {
|
|
@@ -329,12 +329,14 @@ async def transcribe(file: UploadFile, language: str = Form(...)):
|
|
| 329 |
if os.path.exists(temp_filepath):
|
| 330 |
os.remove(temp_filepath)
|
| 331 |
|
|
|
|
| 332 |
|
| 333 |
@app.post('/analyze_all/')
|
| 334 |
-
async def analyze_all(file: UploadFile
|
| 335 |
"""
|
| 336 |
Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
|
| 337 |
"""
|
|
|
|
| 338 |
if not file.filename.endswith(('.wav', '.mp3','.m4a','.mp4','.flac')):
|
| 339 |
raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
|
| 340 |
|
|
@@ -358,8 +360,8 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
|
|
| 358 |
vps_result = analyze_vps_main(temp_filepath)
|
| 359 |
ves_result = calc_voice_engagement_score(temp_filepath)
|
| 360 |
filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
|
| 361 |
-
transcript = transcribe_audio(temp_filepath,
|
| 362 |
-
emotion = predict_emotion(temp_filepath)
|
| 363 |
avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
|
| 364 |
|
| 365 |
|
|
@@ -374,7 +376,8 @@ async def analyze_all(file: UploadFile, language: str = Form(...)):
|
|
| 374 |
"ves": ves_result,
|
| 375 |
"filler_words": filler_count,
|
| 376 |
"transcript": transcript,
|
| 377 |
-
"
|
|
|
|
| 378 |
"sank_score": avg_score
|
| 379 |
}
|
| 380 |
|
|
|
|
| 18 |
from ves.ves import calc_voice_engagement_score
|
| 19 |
from transcribe import transcribe_audio
|
| 20 |
from filler_count.filler_score import analyze_fillers
|
| 21 |
+
#from emotion.emo_predict import predict_emotion
|
| 22 |
|
| 23 |
app = FastAPI()
|
| 24 |
|
|
|
|
| 290 |
|
| 291 |
|
| 292 |
@app.post('/transcribe/')
|
| 293 |
+
async def transcribe(file: UploadFile):
|
| 294 |
"""
|
| 295 |
+
Endpoint to transcribe an uploaded audio file ('.wav', '.mp3','mp4','.m4a','.flac' ).
|
| 296 |
"""
|
| 297 |
#calculate time to transcribe
|
| 298 |
start_time = time.time()
|
|
|
|
| 311 |
shutil.copyfileobj(file.file, buffer)
|
| 312 |
|
| 313 |
# Transcribe using your custom function
|
| 314 |
+
result = transcribe_audio(temp_filepath, model_size="base")
|
| 315 |
end_time = time.time()
|
| 316 |
transcription_time = end_time - start_time
|
| 317 |
response = {
|
|
|
|
| 329 |
if os.path.exists(temp_filepath):
|
| 330 |
os.remove(temp_filepath)
|
| 331 |
|
| 332 |
+
import datetime
|
| 333 |
|
| 334 |
@app.post('/analyze_all/')
|
| 335 |
+
async def analyze_all(file: UploadFile):
|
| 336 |
"""
|
| 337 |
Endpoint to analyze all aspects of an uploaded audio file (.wav or .mp3).
|
| 338 |
"""
|
| 339 |
+
print(f"Received request at {datetime.datetime.now()} for file: {file.filename}")
|
| 340 |
if not file.filename.endswith(('.wav', '.mp3','.m4a','.mp4','.flac')):
|
| 341 |
raise HTTPException(status_code=400, detail="Invalid file type. Only .wav and .mp3 files are supported.")
|
| 342 |
|
|
|
|
| 360 |
vps_result = analyze_vps_main(temp_filepath)
|
| 361 |
ves_result = calc_voice_engagement_score(temp_filepath)
|
| 362 |
filler_count = analyze_fillers(temp_filepath) # Assuming this function returns a dict with filler count
|
| 363 |
+
transcript, language, _ = transcribe_audio(temp_filepath, "base") #fix this
|
| 364 |
+
#emotion = predict_emotion(temp_filepath)
|
| 365 |
avg_score = (fluency_result['fluency_score'] + tone_result['speech_dynamism_score'] + vcs_result['Voice Clarity Sore'] + vers_result['VERS Score'] + voice_confidence_result['voice_confidence_score'] + vps_result['VPS'] + ves_result['ves']) / 7
|
| 366 |
|
| 367 |
|
|
|
|
| 376 |
"ves": ves_result,
|
| 377 |
"filler_words": filler_count,
|
| 378 |
"transcript": transcript,
|
| 379 |
+
"Detected Language": language,
|
| 380 |
+
#"emotion": emotion ,
|
| 381 |
"sank_score": avg_score
|
| 382 |
}
|
| 383 |
|
emotion/__pycache__/emo_predict.cpython-312.pyc
CHANGED
|
Binary files a/emotion/__pycache__/emo_predict.cpython-312.pyc and b/emotion/__pycache__/emo_predict.cpython-312.pyc differ
|
|
|
filler_count/__pycache__/filler_score.cpython-312.pyc
CHANGED
|
Binary files a/filler_count/__pycache__/filler_score.cpython-312.pyc and b/filler_count/__pycache__/filler_score.cpython-312.pyc differ
|
|
|
tone_modulation/__pycache__/tone_api.cpython-312.pyc
CHANGED
|
Binary files a/tone_modulation/__pycache__/tone_api.cpython-312.pyc and b/tone_modulation/__pycache__/tone_api.cpython-312.pyc differ
|
|
|
transcribe.py
CHANGED
|
@@ -1,26 +1,32 @@
|
|
| 1 |
import assemblyai as aai
|
| 2 |
|
| 3 |
-
|
| 4 |
-
aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4" # Replace with env var for production
|
| 5 |
|
| 6 |
-
def transcribe_audio(file_path: str,
|
|
|
|
| 7 |
|
| 8 |
-
print(f"Transcribing audio file: {file_path} with language: {language}")
|
| 9 |
-
# Configure for Hindi language
|
| 10 |
config = aai.TranscriptionConfig(
|
| 11 |
speech_model=aai.SpeechModel.nano,
|
| 12 |
-
|
|
|
|
| 13 |
)
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
|
|
|
| 17 |
|
| 18 |
-
# Perform transcription
|
| 19 |
-
transcript = transcriber.transcribe(file_path)
|
| 20 |
-
|
| 21 |
-
# Check if successful
|
| 22 |
if transcript.status == "error":
|
| 23 |
raise RuntimeError(f"Transcription failed: {transcript.error}")
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import assemblyai as aai
|
| 2 |
|
| 3 |
+
aai.settings.api_key = "2c02e1bdab874068bdcfb2e226f048a4" # Use env var in production
|
|
|
|
| 4 |
|
| 5 |
+
def transcribe_audio(file_path: str, model_size=None) -> tuple[str, str, float]:
|
| 6 |
+
print(f"Transcribing audio file: {file_path} with language detection")
|
| 7 |
|
|
|
|
|
|
|
| 8 |
config = aai.TranscriptionConfig(
|
| 9 |
speech_model=aai.SpeechModel.nano,
|
| 10 |
+
language_detection=True,
|
| 11 |
+
language_confidence_threshold=0.4
|
| 12 |
)
|
| 13 |
|
| 14 |
+
transcriber = aai.Transcriber()
|
| 15 |
+
|
| 16 |
+
transcript = transcriber.transcribe(file_path, config)
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
if transcript.status == "error":
|
| 19 |
raise RuntimeError(f"Transcription failed: {transcript.error}")
|
| 20 |
|
| 21 |
+
# Access detected language and confidence from json_response
|
| 22 |
+
response = transcript.json_response
|
| 23 |
+
language = response.get("language_code")
|
| 24 |
+
confidence = response.get("language_confidence")
|
| 25 |
+
|
| 26 |
+
result = {
|
| 27 |
+
"transcript": transcript.text,
|
| 28 |
+
"language": language,
|
| 29 |
+
"confidence": confidence
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
return transcript.text, language, confidence
|
vers/__pycache__/compute_vers_score.cpython-312.pyc
CHANGED
|
Binary files a/vers/__pycache__/compute_vers_score.cpython-312.pyc and b/vers/__pycache__/compute_vers_score.cpython-312.pyc differ
|
|
|
vers/compute_vers_score.py
CHANGED
|
@@ -79,7 +79,4 @@ def compute_vers_score(file_path: str, whisper_model) -> dict:
|
|
| 79 |
volume_std=volume_std,
|
| 80 |
valence_scores=valence_scores
|
| 81 |
)
|
| 82 |
-
|
| 83 |
-
# Include transcript optionally
|
| 84 |
-
vers_result["transcript"] = transcript
|
| 85 |
return vers_result
|
|
|
|
| 79 |
volume_std=volume_std,
|
| 80 |
valence_scores=valence_scores
|
| 81 |
)
|
|
|
|
|
|
|
|
|
|
| 82 |
return vers_result
|