Spaces:
Sleeping
Sleeping
Update process_interview.py
Browse files- process_interview.py +26 -19
process_interview.py
CHANGED
|
@@ -35,12 +35,14 @@ import google.generativeai as genai
|
|
| 35 |
import joblib
|
| 36 |
from concurrent.futures import ThreadPoolExecutor
|
| 37 |
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
|
|
|
|
| 38 |
# Setup logging
|
| 39 |
logging.basicConfig(level=logging.INFO)
|
| 40 |
logger = logging.getLogger(__name__)
|
| 41 |
logging.getLogger("nemo_logging").setLevel(logging.ERROR)
|
| 42 |
logging.getLogger("nemo").setLevel(logging.ERROR)
|
| 43 |
|
|
|
|
| 44 |
# Configuration
|
| 45 |
AUDIO_DIR = "./uploads"
|
| 46 |
OUTPUT_DIR = "./processed_audio"
|
|
@@ -112,17 +114,24 @@ def load_models():
|
|
| 112 |
speaker_model, nlp, tokenizer, llm_model = load_models()
|
| 113 |
|
| 114 |
|
| 115 |
-
|
| 116 |
-
def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
|
| 117 |
try:
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
return
|
| 126 |
except Exception as e:
|
| 127 |
logger.error(f"Audio conversion failed: {str(e)}")
|
| 128 |
raise
|
|
@@ -1032,7 +1041,6 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 1032 |
logger.error(f"PDF generation failed: {str(e)}", exc_info=True)
|
| 1033 |
return False
|
| 1034 |
|
| 1035 |
-
|
| 1036 |
def convert_to_serializable(obj):
|
| 1037 |
if isinstance(obj, np.generic):
|
| 1038 |
return obj.item()
|
|
@@ -1044,7 +1052,6 @@ def convert_to_serializable(obj):
|
|
| 1044 |
return obj.tolist()
|
| 1045 |
return obj
|
| 1046 |
|
| 1047 |
-
|
| 1048 |
def process_interview(audio_path: str):
|
| 1049 |
try:
|
| 1050 |
logger.info(f"Starting processing for {audio_path}")
|
|
@@ -1053,6 +1060,12 @@ def process_interview(audio_path: str):
|
|
| 1053 |
|
| 1054 |
logger.info("Starting transcription")
|
| 1055 |
transcript = transcribe(wav_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1056 |
|
| 1057 |
logger.info("Extracting prosodic features")
|
| 1058 |
for utterance in transcript['utterances']:
|
|
@@ -1066,9 +1079,6 @@ def process_interview(audio_path: str):
|
|
| 1066 |
utterances_with_speakers = identify_speakers(transcript, wav_file)
|
| 1067 |
|
| 1068 |
logger.info("Classifying roles")
|
| 1069 |
-
# Ensure role classifier models are loaded/trained only once if possible,
|
| 1070 |
-
# or handled carefully in a multi-threaded context.
|
| 1071 |
-
# For simplicity, keeping it inside process_interview for now.
|
| 1072 |
if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
|
| 1073 |
clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
|
| 1074 |
vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
|
|
@@ -1091,10 +1101,8 @@ def process_interview(audio_path: str):
|
|
| 1091 |
}
|
| 1092 |
}
|
| 1093 |
|
| 1094 |
-
# --- Calculate Acceptance Probability ---
|
| 1095 |
acceptance_probability = calculate_acceptance_probability(analysis_data)
|
| 1096 |
analysis_data['acceptance_probability'] = acceptance_probability
|
| 1097 |
-
# --- End Acceptance Probability ---
|
| 1098 |
|
| 1099 |
logger.info("Generating report text using Gemini")
|
| 1100 |
gemini_report_text = generate_report(analysis_data)
|
|
@@ -1108,7 +1116,7 @@ def process_interview(audio_path: str):
|
|
| 1108 |
serializable_data = convert_to_serializable(analysis_data)
|
| 1109 |
json.dump(serializable_data, f, indent=2)
|
| 1110 |
|
| 1111 |
-
os.remove(wav_file)
|
| 1112 |
|
| 1113 |
logger.info(f"Processing completed for {audio_path}")
|
| 1114 |
return {
|
|
@@ -1117,7 +1125,6 @@ def process_interview(audio_path: str):
|
|
| 1117 |
}
|
| 1118 |
except Exception as e:
|
| 1119 |
logger.error(f"Processing failed: {str(e)}", exc_info=True)
|
| 1120 |
-
# Clean up wav_file in case of error
|
| 1121 |
if 'wav_file' in locals() and os.path.exists(wav_file):
|
| 1122 |
os.remove(wav_file)
|
| 1123 |
-
raise
|
|
|
|
| 35 |
import joblib
|
| 36 |
from concurrent.futures import ThreadPoolExecutor
|
| 37 |
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
|
| 38 |
+
import subprocess
|
| 39 |
# Setup logging
|
| 40 |
logging.basicConfig(level=logging.INFO)
|
| 41 |
logger = logging.getLogger(__name__)
|
| 42 |
logging.getLogger("nemo_logging").setLevel(logging.ERROR)
|
| 43 |
logging.getLogger("nemo").setLevel(logging.ERROR)
|
| 44 |
|
| 45 |
+
|
| 46 |
# Configuration
|
| 47 |
AUDIO_DIR = "./uploads"
|
| 48 |
OUTPUT_DIR = "./processed_audio"
|
|
|
|
| 114 |
speaker_model, nlp, tokenizer, llm_model = load_models()
|
| 115 |
|
| 116 |
|
| 117 |
+
def convert_to_wav(input_path: str, output_dir: str = OUTPUT_DIR) -> str:
|
|
|
|
| 118 |
try:
|
| 119 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 120 |
+
output_path = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
|
| 121 |
+
command = [
|
| 122 |
+
'ffmpeg', '-y',
|
| 123 |
+
'-i', input_path,
|
| 124 |
+
'-vn', # ignore video stream completely
|
| 125 |
+
'-acodec', 'pcm_s16le',
|
| 126 |
+
'-ar', '16000',
|
| 127 |
+
'-ac', '1',
|
| 128 |
+
output_path
|
| 129 |
+
]
|
| 130 |
+
subprocess.run(command, check=True)
|
| 131 |
|
| 132 |
+
size_in_mb = os.path.getsize(output_path) / (1024*1024)
|
| 133 |
+
logger.info(f"WAV file size: {size_in_mb:.2f} MB")
|
| 134 |
+
return output_path
|
| 135 |
except Exception as e:
|
| 136 |
logger.error(f"Audio conversion failed: {str(e)}")
|
| 137 |
raise
|
|
|
|
| 1041 |
logger.error(f"PDF generation failed: {str(e)}", exc_info=True)
|
| 1042 |
return False
|
| 1043 |
|
|
|
|
| 1044 |
def convert_to_serializable(obj):
|
| 1045 |
if isinstance(obj, np.generic):
|
| 1046 |
return obj.item()
|
|
|
|
| 1052 |
return obj.tolist()
|
| 1053 |
return obj
|
| 1054 |
|
|
|
|
| 1055 |
def process_interview(audio_path: str):
|
| 1056 |
try:
|
| 1057 |
logger.info(f"Starting processing for {audio_path}")
|
|
|
|
| 1060 |
|
| 1061 |
logger.info("Starting transcription")
|
| 1062 |
transcript = transcribe(wav_file)
|
| 1063 |
+
logger.info("Transcript result: %s", transcript)
|
| 1064 |
+
|
| 1065 |
+
# Check transcript validity
|
| 1066 |
+
if not transcript or 'utterances' not in transcript or not transcript['utterances']:
|
| 1067 |
+
logger.error("Transcription failed or returned empty utterances")
|
| 1068 |
+
raise ValueError("Transcription failed or returned empty utterances")
|
| 1069 |
|
| 1070 |
logger.info("Extracting prosodic features")
|
| 1071 |
for utterance in transcript['utterances']:
|
|
|
|
| 1079 |
utterances_with_speakers = identify_speakers(transcript, wav_file)
|
| 1080 |
|
| 1081 |
logger.info("Classifying roles")
|
|
|
|
|
|
|
|
|
|
| 1082 |
if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
|
| 1083 |
clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
|
| 1084 |
vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
|
|
|
|
| 1101 |
}
|
| 1102 |
}
|
| 1103 |
|
|
|
|
| 1104 |
acceptance_probability = calculate_acceptance_probability(analysis_data)
|
| 1105 |
analysis_data['acceptance_probability'] = acceptance_probability
|
|
|
|
| 1106 |
|
| 1107 |
logger.info("Generating report text using Gemini")
|
| 1108 |
gemini_report_text = generate_report(analysis_data)
|
|
|
|
| 1116 |
serializable_data = convert_to_serializable(analysis_data)
|
| 1117 |
json.dump(serializable_data, f, indent=2)
|
| 1118 |
|
| 1119 |
+
os.remove(wav_file)
|
| 1120 |
|
| 1121 |
logger.info(f"Processing completed for {audio_path}")
|
| 1122 |
return {
|
|
|
|
| 1125 |
}
|
| 1126 |
except Exception as e:
|
| 1127 |
logger.error(f"Processing failed: {str(e)}", exc_info=True)
|
|
|
|
| 1128 |
if 'wav_file' in locals() and os.path.exists(wav_file):
|
| 1129 |
os.remove(wav_file)
|
| 1130 |
+
raise
|