Spaces:
Sleeping
Sleeping
Update live.py
Browse files
live.py
CHANGED
|
@@ -1,323 +1,323 @@
|
|
| 1 |
-
import warnings
|
| 2 |
-
from functions.models import models_dict
|
| 3 |
-
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
|
| 4 |
-
import os
|
| 5 |
-
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
| 6 |
-
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
| 7 |
-
import logging
|
| 8 |
-
logging.getLogger('absl').setLevel(logging.ERROR)
|
| 9 |
-
from moviepy.editor import VideoFileClip
|
| 10 |
-
import pandas as pd
|
| 11 |
-
from tqdm import tqdm
|
| 12 |
-
import time
|
| 13 |
-
import json
|
| 14 |
-
import cv2
|
| 15 |
-
import dlib
|
| 16 |
-
from collections import Counter
|
| 17 |
-
import statistics
|
| 18 |
-
import shutil
|
| 19 |
-
import asyncio
|
| 20 |
-
import traceback
|
| 21 |
-
|
| 22 |
-
from functions.valence_arousal import va_predict
|
| 23 |
-
from functions.speech import speech_predict
|
| 24 |
-
from functions.eye_track import Facetrack, eye_track_predict
|
| 25 |
-
from functions.fer import extract_face,fer_predict,plot_graph,filter
|
| 26 |
-
# from app.utils.session import send_analytics, send_individual_analytics_files, send_combined_analytics_files, send_error
|
| 27 |
-
# from app.utils.socket import ConnectionManager
|
| 28 |
-
from typing import Callable
|
| 29 |
-
session_data={}
|
| 30 |
-
dnn_net=models_dict['face'][0]
|
| 31 |
-
predictor=models_dict['face'][1]
|
| 32 |
-
speech_model=models_dict['speech']
|
| 33 |
-
valence_dict_path=models_dict['vad'][0]
|
| 34 |
-
arousal_dict_path=models_dict['vad'][1]
|
| 35 |
-
dominance_dict_path=models_dict['vad'][2]
|
| 36 |
-
valence_arousal_model=models_dict['valence_fer'][1]
|
| 37 |
-
val_ar_feat_model=models_dict['valence_fer'][0]
|
| 38 |
-
fer_model=models_dict['fer']
|
| 39 |
-
|
| 40 |
-
def analyze_live_video(video_path: str, uid: str, user_id: str, count: int, final: bool, log: Callable[[str], None]):
|
| 41 |
-
try:
|
| 42 |
-
#initilalizing lists
|
| 43 |
-
global session_data
|
| 44 |
-
if uid not in session_data:
|
| 45 |
-
session_data[uid] = {
|
| 46 |
-
"vcount":[],
|
| 47 |
-
"duration":[],
|
| 48 |
-
|
| 49 |
-
"eye": [],
|
| 50 |
-
|
| 51 |
-
"fer": [],
|
| 52 |
-
"valence":[],
|
| 53 |
-
"arousal":[],
|
| 54 |
-
"stress":[],
|
| 55 |
-
|
| 56 |
-
"blinks": [],
|
| 57 |
-
"class_wise_frame_counts": [],
|
| 58 |
-
|
| 59 |
-
"speech_emotions": [],
|
| 60 |
-
"speech_data":[],
|
| 61 |
-
"word_weights_list": []
|
| 62 |
-
}
|
| 63 |
-
print(f"UID: {uid}, User ID: {user_id}, Count: {count}, Final: {final}, Video: {video_path}")
|
| 64 |
-
log(f"Analyzing video for question - {count}")
|
| 65 |
-
|
| 66 |
-
output_dir = os.path.join('output',str(uid))
|
| 67 |
-
print(output_dir)
|
| 68 |
-
if not os.path.exists(output_dir):
|
| 69 |
-
os.makedirs(output_dir)
|
| 70 |
-
# Wait for previous files to be written if final
|
| 71 |
-
if final and count > 1:
|
| 72 |
-
for i in range(1, count):
|
| 73 |
-
previous_file_name = os.path.join(output_dir, f"{i}.json")
|
| 74 |
-
print(previous_file_name)
|
| 75 |
-
while not os.path.exists(previous_file_name):
|
| 76 |
-
time.sleep(1)
|
| 77 |
-
|
| 78 |
-
video_clip = VideoFileClip(video_path)
|
| 79 |
-
video_clip = video_clip.set_fps(30)
|
| 80 |
-
print("Duration: ", video_clip.duration)
|
| 81 |
-
session_data[uid]['vcount'].append(count)
|
| 82 |
-
session_data[uid]['duration'].append(video_clip.duration)
|
| 83 |
-
fps = video_clip.fps
|
| 84 |
-
audio = video_clip.audio
|
| 85 |
-
audio_path = os.path.join(output_dir,'extracted_audio.wav')
|
| 86 |
-
audio.write_audiofile(audio_path)
|
| 87 |
-
video_frames = [frame for frame in video_clip.iter_frames()]
|
| 88 |
-
|
| 89 |
-
#Face extraction
|
| 90 |
-
print("extracting faces")
|
| 91 |
-
faces=[extract_face(frame,dnn_net,predictor) for frame in tqdm(video_frames)]
|
| 92 |
-
print(f'{len([face for face in faces if face is not None])} faces found.')
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
##EYE TRACKING
|
| 96 |
-
fc=Facetrack()
|
| 97 |
-
log(f"Extracting eye features for question - {count}")
|
| 98 |
-
eye_preds,blink_durations,total_blinks=eye_track_predict(fc,faces,fps)
|
| 99 |
-
print(len(eye_preds))
|
| 100 |
-
print("total_blinks- ",total_blinks)
|
| 101 |
-
session_data[uid]['eye'].append(eye_preds)
|
| 102 |
-
session_data[uid]['blinks'].append(blink_durations)
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
#FACIAL EXPRESSION RECOGNITION
|
| 106 |
-
log(f"Extracting facial features for question - {count}")
|
| 107 |
-
fer_emotions,class_wise_frame_count,em_tensors=fer_predict(faces,fps,fer_model)
|
| 108 |
-
print("face emotions",len(fer_emotions))
|
| 109 |
-
session_data[uid]['fer'].append(fer_emotions)
|
| 110 |
-
session_data[uid]['class_wise_frame_counts'].append(class_wise_frame_count)
|
| 111 |
-
|
| 112 |
-
#VALENCE AROUSAL STRESS
|
| 113 |
-
valence_list,arousal_list,stress_list=va_predict(valence_arousal_model,val_ar_feat_model,faces,list(em_tensors))
|
| 114 |
-
session_data[uid]['valence'].append(valence_list)
|
| 115 |
-
session_data[uid]['arousal'].append(arousal_list)
|
| 116 |
-
session_data[uid]['stress'].append(stress_list)
|
| 117 |
-
log(f"Extracting speech features for question - {count}")
|
| 118 |
-
emotions,major_emotion,word=speech_predict(audio_path,speech_model,valence_dict_path,arousal_dict_path,dominance_dict_path)
|
| 119 |
-
session_data[uid]['speech_emotions'].append(emotions)
|
| 120 |
-
session_data[uid]['word_weights_list'].append(word['word_weights'])
|
| 121 |
-
session_data[uid]['speech_data'].append([float(word['average_pause_length'] if word and word['average_pause_length'] else 0),float(word['articulation_rate'] if word and word['articulation_rate'] else 0),float(word['speaking_rate'] if word and word['speaking_rate'] else 0)])
|
| 122 |
-
log(f"Generating the metadata for question - {count}")
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
# Create Meta Data
|
| 126 |
-
meta_data={}
|
| 127 |
-
try:
|
| 128 |
-
avg_blink_duration= float(sum(blink_durations)/(len(blink_durations)))
|
| 129 |
-
except:
|
| 130 |
-
avg_blink_duration=0
|
| 131 |
-
meta_data['vcount']=count
|
| 132 |
-
meta_data['eye_emotion_recognition'] = {
|
| 133 |
-
"blink_durations": blink_durations,
|
| 134 |
-
"avg_blink_duration":avg_blink_duration,
|
| 135 |
-
"total_blinks": total_blinks,
|
| 136 |
-
"duration":video_clip.duration
|
| 137 |
-
}
|
| 138 |
-
|
| 139 |
-
meta_data['facial_emotion_recognition'] = {
|
| 140 |
-
"class_wise_frame_count": class_wise_frame_count,
|
| 141 |
-
}
|
| 142 |
-
meta_data['speech_emotion_recognition'] = {
|
| 143 |
-
'major_emotion':str(major_emotion),
|
| 144 |
-
'pause_length':float(word['average_pause_length']),
|
| 145 |
-
'articulation_rate':float(word['articulation_rate']),
|
| 146 |
-
'speaking_rate':float(word['speaking_rate']),
|
| 147 |
-
'word_weights':word['word_weights']
|
| 148 |
-
}
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
file_path=audio_path
|
| 152 |
-
if os.path.exists(file_path):
|
| 153 |
-
os.remove(file_path)
|
| 154 |
-
print(f"{file_path} deleted")
|
| 155 |
-
file_path='segment.wav'
|
| 156 |
-
if os.path.exists(file_path):
|
| 157 |
-
os.remove(file_path)
|
| 158 |
-
print(f"{file_path} deleted")
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
print("Individual: ", meta_data)
|
| 162 |
-
if not final:
|
| 163 |
-
print("Not final Executing")
|
| 164 |
-
log(f"Saving analytics for question - {count}")
|
| 165 |
-
# send_analytics(valence_plot, arousal_plot,{
|
| 166 |
-
# "uid": uid,
|
| 167 |
-
# "user_id": user_id,
|
| 168 |
-
# "individual": meta_data,
|
| 169 |
-
# "count": count
|
| 170 |
-
# })
|
| 171 |
-
print("Sent analytics")
|
| 172 |
-
# send_individual_analytics_files(uid, output_dir, count)
|
| 173 |
-
dummy_file_path = os.path.join(output_dir, f'{count}.json')
|
| 174 |
-
print("Writing dummy file: ", dummy_file_path)
|
| 175 |
-
with open(dummy_file_path, 'w') as dummy_file:
|
| 176 |
-
json.dump({"status": "completed"}, dummy_file)
|
| 177 |
-
return
|
| 178 |
-
|
| 179 |
-
# Process combined
|
| 180 |
-
log(f"Processing gathered data for final output")
|
| 181 |
-
|
| 182 |
-
vcount=session_data[uid]['vcount']
|
| 183 |
-
sorted_indices = sorted(range(len(vcount)), key=lambda i: vcount[i])
|
| 184 |
-
for key in session_data[uid]:
|
| 185 |
-
# Only sort lists that are the same length as vcount
|
| 186 |
-
if len(session_data[uid][key]) == len(vcount):
|
| 187 |
-
session_data[uid][key] = [session_data[uid][key][i] for i in sorted_indices]
|
| 188 |
-
|
| 189 |
-
videos=len(session_data[uid]['vcount'])
|
| 190 |
-
#INDIV PLOT SAVING
|
| 191 |
-
combined_speech=[]
|
| 192 |
-
combined_valence=[]
|
| 193 |
-
combined_arousal=[]
|
| 194 |
-
combined_stress=[]
|
| 195 |
-
combined_fer=[]
|
| 196 |
-
combined_eye=[]
|
| 197 |
-
vid_index=[]
|
| 198 |
-
combined_speech=[]
|
| 199 |
-
combined_blinks=[]
|
| 200 |
-
for i in range(videos):
|
| 201 |
-
for j in range(len(session_data[uid]['speech_emotions'][i])):
|
| 202 |
-
vid_index.append(i+1)
|
| 203 |
-
combined_speech+=session_data[uid]['speech_emotions'][i]
|
| 204 |
-
timestamps=[i*3 for i in range(len(combined_speech))]
|
| 205 |
-
df = pd.DataFrame({
|
| 206 |
-
'timestamps':timestamps,
|
| 207 |
-
'video_index':vid_index,
|
| 208 |
-
'speech_emotion':combined_speech
|
| 209 |
-
})
|
| 210 |
-
df.to_csv(os.path.join(output_dir,'combined_speech.csv'), index=False)
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
vid_index=[]
|
| 215 |
-
for i in range(videos):
|
| 216 |
-
timestamps=[j/30 for j in range(len(session_data[uid]['valence'][i]))]
|
| 217 |
-
for j in range(len(timestamps)):
|
| 218 |
-
vid_index.append(i+1)
|
| 219 |
-
folder_path=os.path.join(output_dir,f
|
| 220 |
-
os.makedirs(folder_path, exist_ok=True)
|
| 221 |
-
plot_graph(timestamps,session_data[uid]['valence'][i],'valence',os.path.join(folder_path,'valence.png'))
|
| 222 |
-
plot_graph(timestamps,session_data[uid]['arousal'][i],'arousal',os.path.join(folder_path,'arousal.png'))
|
| 223 |
-
plot_graph(timestamps,session_data[uid]['stress'][i],'stress',os.path.join(folder_path,'stress.png'))
|
| 224 |
-
combined_arousal+=session_data[uid]['arousal'][i]
|
| 225 |
-
combined_valence+=session_data[uid]['valence'][i]
|
| 226 |
-
combined_stress+=session_data[uid]['stress'][i]
|
| 227 |
-
combined_fer+=session_data[uid]['fer'][i]
|
| 228 |
-
combined_blinks+=session_data[uid]['blinks'][i]
|
| 229 |
-
# combined_class_wise_frame_count+=session_data[uid]['class_wise_frame_counts'][i]
|
| 230 |
-
try:
|
| 231 |
-
max_value=max([x for x in combined_eye if isinstance(x, (int, float))])
|
| 232 |
-
except:
|
| 233 |
-
max_value=0
|
| 234 |
-
session_data[uid]['eye'][i]=[x + max_value if isinstance(x, (int, float)) else x for x in session_data[uid]['eye'][i]]
|
| 235 |
-
combined_eye+=session_data[uid]['eye'][i]
|
| 236 |
-
|
| 237 |
-
timestamps=[i/fps for i in range(len(combined_arousal))]
|
| 238 |
-
plot_graph(timestamps,combined_valence,'valence',os.path.join(output_dir,'valence.png'))
|
| 239 |
-
plot_graph(timestamps,combined_arousal,'arousal',os.path.join(output_dir,'arousal.png'))
|
| 240 |
-
plot_graph(timestamps,combined_stress,'stress',os.path.join(output_dir,'stress.png'))
|
| 241 |
-
print(len(timestamps),len(vid_index),len(combined_fer),len(combined_valence),len(combined_arousal),len(combined_stress),len(combined_eye))
|
| 242 |
-
df = pd.DataFrame({
|
| 243 |
-
'timestamps':timestamps,
|
| 244 |
-
'video_index': vid_index, # Add a column for video index
|
| 245 |
-
'fer': combined_fer,
|
| 246 |
-
'valence': combined_valence,
|
| 247 |
-
'arousal': combined_arousal,
|
| 248 |
-
'stress': combined_stress,
|
| 249 |
-
'eye': combined_eye,
|
| 250 |
-
})
|
| 251 |
-
df.to_csv(os.path.join(output_dir,'combined_data.csv'), index=False)
|
| 252 |
-
|
| 253 |
-
#generate metadata for Combined
|
| 254 |
-
comb_meta_data={}
|
| 255 |
-
try:
|
| 256 |
-
avg_blink_duration= float(sum(combined_blinks)/(len(combined_blinks)))
|
| 257 |
-
except:
|
| 258 |
-
avg_blink_duration=0
|
| 259 |
-
|
| 260 |
-
total_blinks=max([x for x in combined_eye if isinstance(x, (int, float))])
|
| 261 |
-
|
| 262 |
-
comb_meta_data['eye_emotion_recognition'] = {
|
| 263 |
-
"avg_blink_duration":avg_blink_duration,
|
| 264 |
-
"total_blinks": total_blinks,
|
| 265 |
-
}
|
| 266 |
-
|
| 267 |
-
dict_list = session_data[uid]['class_wise_frame_counts']
|
| 268 |
-
|
| 269 |
-
result = {}
|
| 270 |
-
for d in dict_list:
|
| 271 |
-
for key,value in d.items():
|
| 272 |
-
result[key]=result.get(key,0)+value
|
| 273 |
-
comb_meta_data['facial_emotion_recognition'] = {
|
| 274 |
-
"class_wise_frame_count": result,
|
| 275 |
-
}
|
| 276 |
-
|
| 277 |
-
combined_weights = Counter()
|
| 278 |
-
for word_weight in session_data[uid]['word_weights_list']:
|
| 279 |
-
combined_weights.update(word_weight)
|
| 280 |
-
combined_weights_dict = dict(combined_weights)
|
| 281 |
-
print(combined_weights_dict)
|
| 282 |
-
comb_meta_data['speech_emotion_recognition'] = {
|
| 283 |
-
'major_emotion':str(major_emotion),
|
| 284 |
-
'pause_length':statistics.mean([row[0] for row in session_data[uid]['speech_data']]),
|
| 285 |
-
'articulation_rate':statistics.mean([row[1] for row in session_data[uid]['speech_data']]),
|
| 286 |
-
'speaking_rate':statistics.mean([row[2] for row in session_data[uid]['speech_data']]),
|
| 287 |
-
'word_weights':combined_weights_dict
|
| 288 |
-
}
|
| 289 |
-
with open(os.path.join(output_dir,'combined.json'), 'w') as json_file:
|
| 290 |
-
json.dump(comb_meta_data, json_file)
|
| 291 |
-
log(f"Saving analytics for final output")
|
| 292 |
-
# send_analytics(valence_plot, arousal_plot,{
|
| 293 |
-
# "uid": uid,
|
| 294 |
-
# "user_id": user_id,
|
| 295 |
-
# "individual": meta_data,
|
| 296 |
-
# "combined": combined_meta_data,
|
| 297 |
-
# "count": count
|
| 298 |
-
# })
|
| 299 |
-
# send_individual_analytics_files(uid, output_dir, count)
|
| 300 |
-
# send_combined_analytics_files(uid, output_dir)
|
| 301 |
-
|
| 302 |
-
# shutil.rmtree(output_dir)
|
| 303 |
-
# print(f"Deleted output directory: {output_dir}")
|
| 304 |
-
except Exception as e:
|
| 305 |
-
print("Error analyzing video...: ", e)
|
| 306 |
-
error_trace = traceback.format_exc()
|
| 307 |
-
print("Error Trace: ", error_trace)
|
| 308 |
-
log(f"Error analyzing video for question - {count}")
|
| 309 |
-
# send_error(uid, {
|
| 310 |
-
# "message": str(e),
|
| 311 |
-
# "trace": error_trace
|
| 312 |
-
# })
|
| 313 |
-
shutil.rmtree('output')
|
| 314 |
-
print(f"Deleted output directory: {output_dir}")
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
# st=time.time()
|
| 318 |
-
# # analyze_live_video(video_path, uid, user_id, count, final, log)
|
| 319 |
-
# analyze_live_video('videos/s2.webm', 1,1,1,False,print)
|
| 320 |
-
# analyze_live_video('videos/a4.webm', 1,1,2,True,print)
|
| 321 |
-
|
| 322 |
-
# analyze_live_video('videos/s2.webm', 1,1,2,True,print)
|
| 323 |
# print("time taken - ",time.time()-st)
|
|
|
|
| 1 |
+
import warnings
|
| 2 |
+
from functions.models import models_dict
|
| 3 |
+
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
|
| 4 |
+
import os
|
| 5 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
| 6 |
+
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
| 7 |
+
import logging
|
| 8 |
+
logging.getLogger('absl').setLevel(logging.ERROR)
|
| 9 |
+
from moviepy.editor import VideoFileClip
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from tqdm import tqdm
|
| 12 |
+
import time
|
| 13 |
+
import json
|
| 14 |
+
import cv2
|
| 15 |
+
import dlib
|
| 16 |
+
from collections import Counter
|
| 17 |
+
import statistics
|
| 18 |
+
import shutil
|
| 19 |
+
import asyncio
|
| 20 |
+
import traceback
|
| 21 |
+
|
| 22 |
+
from functions.valence_arousal import va_predict
|
| 23 |
+
from functions.speech import speech_predict
|
| 24 |
+
from functions.eye_track import Facetrack, eye_track_predict
|
| 25 |
+
from functions.fer import extract_face,fer_predict,plot_graph,filter
|
| 26 |
+
# from app.utils.session import send_analytics, send_individual_analytics_files, send_combined_analytics_files, send_error
|
| 27 |
+
# from app.utils.socket import ConnectionManager
|
| 28 |
+
from typing import Callable
|
| 29 |
+
session_data={}
|
| 30 |
+
dnn_net=models_dict['face'][0]
|
| 31 |
+
predictor=models_dict['face'][1]
|
| 32 |
+
speech_model=models_dict['speech']
|
| 33 |
+
valence_dict_path=models_dict['vad'][0]
|
| 34 |
+
arousal_dict_path=models_dict['vad'][1]
|
| 35 |
+
dominance_dict_path=models_dict['vad'][2]
|
| 36 |
+
valence_arousal_model=models_dict['valence_fer'][1]
|
| 37 |
+
val_ar_feat_model=models_dict['valence_fer'][0]
|
| 38 |
+
fer_model=models_dict['fer']
|
| 39 |
+
|
| 40 |
+
def analyze_live_video(video_path: str, uid: str, user_id: str, count: int, final: bool, log: Callable[[str], None]):
|
| 41 |
+
try:
|
| 42 |
+
#initilalizing lists
|
| 43 |
+
global session_data
|
| 44 |
+
if uid not in session_data:
|
| 45 |
+
session_data[uid] = {
|
| 46 |
+
"vcount":[],
|
| 47 |
+
"duration":[],
|
| 48 |
+
|
| 49 |
+
"eye": [],
|
| 50 |
+
|
| 51 |
+
"fer": [],
|
| 52 |
+
"valence":[],
|
| 53 |
+
"arousal":[],
|
| 54 |
+
"stress":[],
|
| 55 |
+
|
| 56 |
+
"blinks": [],
|
| 57 |
+
"class_wise_frame_counts": [],
|
| 58 |
+
|
| 59 |
+
"speech_emotions": [],
|
| 60 |
+
"speech_data":[],
|
| 61 |
+
"word_weights_list": []
|
| 62 |
+
}
|
| 63 |
+
print(f"UID: {uid}, User ID: {user_id}, Count: {count}, Final: {final}, Video: {video_path}")
|
| 64 |
+
log(f"Analyzing video for question - {count}")
|
| 65 |
+
|
| 66 |
+
output_dir = os.path.join('output',str(uid))
|
| 67 |
+
print(output_dir)
|
| 68 |
+
if not os.path.exists(output_dir):
|
| 69 |
+
os.makedirs(output_dir)
|
| 70 |
+
# Wait for previous files to be written if final
|
| 71 |
+
if final and count > 1:
|
| 72 |
+
for i in range(1, count):
|
| 73 |
+
previous_file_name = os.path.join(output_dir, f"{i}.json")
|
| 74 |
+
print(previous_file_name)
|
| 75 |
+
while not os.path.exists(previous_file_name):
|
| 76 |
+
time.sleep(1)
|
| 77 |
+
|
| 78 |
+
video_clip = VideoFileClip(video_path)
|
| 79 |
+
video_clip = video_clip.set_fps(30)
|
| 80 |
+
print("Duration: ", video_clip.duration)
|
| 81 |
+
session_data[uid]['vcount'].append(count)
|
| 82 |
+
session_data[uid]['duration'].append(video_clip.duration)
|
| 83 |
+
fps = video_clip.fps
|
| 84 |
+
audio = video_clip.audio
|
| 85 |
+
audio_path = os.path.join(output_dir,'extracted_audio.wav')
|
| 86 |
+
audio.write_audiofile(audio_path)
|
| 87 |
+
video_frames = [frame for frame in video_clip.iter_frames()]
|
| 88 |
+
|
| 89 |
+
#Face extraction
|
| 90 |
+
print("extracting faces")
|
| 91 |
+
faces=[extract_face(frame,dnn_net,predictor) for frame in tqdm(video_frames)]
|
| 92 |
+
print(f'{len([face for face in faces if face is not None])} faces found.')
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
##EYE TRACKING
|
| 96 |
+
fc=Facetrack()
|
| 97 |
+
log(f"Extracting eye features for question - {count}")
|
| 98 |
+
eye_preds,blink_durations,total_blinks=eye_track_predict(fc,faces,fps)
|
| 99 |
+
print(len(eye_preds))
|
| 100 |
+
print("total_blinks- ",total_blinks)
|
| 101 |
+
session_data[uid]['eye'].append(eye_preds)
|
| 102 |
+
session_data[uid]['blinks'].append(blink_durations)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
#FACIAL EXPRESSION RECOGNITION
|
| 106 |
+
log(f"Extracting facial features for question - {count}")
|
| 107 |
+
fer_emotions,class_wise_frame_count,em_tensors=fer_predict(faces,fps,fer_model)
|
| 108 |
+
print("face emotions",len(fer_emotions))
|
| 109 |
+
session_data[uid]['fer'].append(fer_emotions)
|
| 110 |
+
session_data[uid]['class_wise_frame_counts'].append(class_wise_frame_count)
|
| 111 |
+
|
| 112 |
+
#VALENCE AROUSAL STRESS
|
| 113 |
+
valence_list,arousal_list,stress_list=va_predict(valence_arousal_model,val_ar_feat_model,faces,list(em_tensors))
|
| 114 |
+
session_data[uid]['valence'].append(valence_list)
|
| 115 |
+
session_data[uid]['arousal'].append(arousal_list)
|
| 116 |
+
session_data[uid]['stress'].append(stress_list)
|
| 117 |
+
log(f"Extracting speech features for question - {count}")
|
| 118 |
+
emotions,major_emotion,word=speech_predict(audio_path,speech_model,valence_dict_path,arousal_dict_path,dominance_dict_path)
|
| 119 |
+
session_data[uid]['speech_emotions'].append(emotions)
|
| 120 |
+
session_data[uid]['word_weights_list'].append(word['word_weights'])
|
| 121 |
+
session_data[uid]['speech_data'].append([float(word['average_pause_length'] if word and word['average_pause_length'] else 0),float(word['articulation_rate'] if word and word['articulation_rate'] else 0),float(word['speaking_rate'] if word and word['speaking_rate'] else 0)])
|
| 122 |
+
log(f"Generating the metadata for question - {count}")
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# Create Meta Data
|
| 126 |
+
meta_data={}
|
| 127 |
+
try:
|
| 128 |
+
avg_blink_duration= float(sum(blink_durations)/(len(blink_durations)))
|
| 129 |
+
except:
|
| 130 |
+
avg_blink_duration=0
|
| 131 |
+
meta_data['vcount']=count
|
| 132 |
+
meta_data['eye_emotion_recognition'] = {
|
| 133 |
+
"blink_durations": blink_durations,
|
| 134 |
+
"avg_blink_duration":avg_blink_duration,
|
| 135 |
+
"total_blinks": total_blinks,
|
| 136 |
+
"duration":video_clip.duration
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
meta_data['facial_emotion_recognition'] = {
|
| 140 |
+
"class_wise_frame_count": class_wise_frame_count,
|
| 141 |
+
}
|
| 142 |
+
meta_data['speech_emotion_recognition'] = {
|
| 143 |
+
'major_emotion':str(major_emotion),
|
| 144 |
+
'pause_length':float(word['average_pause_length']),
|
| 145 |
+
'articulation_rate':float(word['articulation_rate']),
|
| 146 |
+
'speaking_rate':float(word['speaking_rate']),
|
| 147 |
+
'word_weights':word['word_weights']
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
file_path=audio_path
|
| 152 |
+
if os.path.exists(file_path):
|
| 153 |
+
os.remove(file_path)
|
| 154 |
+
print(f"{file_path} deleted")
|
| 155 |
+
file_path='segment.wav'
|
| 156 |
+
if os.path.exists(file_path):
|
| 157 |
+
os.remove(file_path)
|
| 158 |
+
print(f"{file_path} deleted")
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
print("Individual: ", meta_data)
|
| 162 |
+
if not final:
|
| 163 |
+
print("Not final Executing")
|
| 164 |
+
log(f"Saving analytics for question - {count}")
|
| 165 |
+
# send_analytics(valence_plot, arousal_plot,{
|
| 166 |
+
# "uid": uid,
|
| 167 |
+
# "user_id": user_id,
|
| 168 |
+
# "individual": meta_data,
|
| 169 |
+
# "count": count
|
| 170 |
+
# })
|
| 171 |
+
print("Sent analytics")
|
| 172 |
+
# send_individual_analytics_files(uid, output_dir, count)
|
| 173 |
+
dummy_file_path = os.path.join(output_dir, f'{count}.json')
|
| 174 |
+
print("Writing dummy file: ", dummy_file_path)
|
| 175 |
+
with open(dummy_file_path, 'w') as dummy_file:
|
| 176 |
+
json.dump({"status": "completed"}, dummy_file)
|
| 177 |
+
return
|
| 178 |
+
|
| 179 |
+
# Process combined
|
| 180 |
+
log(f"Processing gathered data for final output")
|
| 181 |
+
|
| 182 |
+
vcount=session_data[uid]['vcount']
|
| 183 |
+
sorted_indices = sorted(range(len(vcount)), key=lambda i: vcount[i])
|
| 184 |
+
for key in session_data[uid]:
|
| 185 |
+
# Only sort lists that are the same length as vcount
|
| 186 |
+
if len(session_data[uid][key]) == len(vcount):
|
| 187 |
+
session_data[uid][key] = [session_data[uid][key][i] for i in sorted_indices]
|
| 188 |
+
|
| 189 |
+
videos=len(session_data[uid]['vcount'])
|
| 190 |
+
#INDIV PLOT SAVING
|
| 191 |
+
combined_speech=[]
|
| 192 |
+
combined_valence=[]
|
| 193 |
+
combined_arousal=[]
|
| 194 |
+
combined_stress=[]
|
| 195 |
+
combined_fer=[]
|
| 196 |
+
combined_eye=[]
|
| 197 |
+
vid_index=[]
|
| 198 |
+
combined_speech=[]
|
| 199 |
+
combined_blinks=[]
|
| 200 |
+
for i in range(videos):
|
| 201 |
+
for j in range(len(session_data[uid]['speech_emotions'][i])):
|
| 202 |
+
vid_index.append(i+1)
|
| 203 |
+
combined_speech+=session_data[uid]['speech_emotions'][i]
|
| 204 |
+
timestamps=[i*3 for i in range(len(combined_speech))]
|
| 205 |
+
df = pd.DataFrame({
|
| 206 |
+
'timestamps':timestamps,
|
| 207 |
+
'video_index':vid_index,
|
| 208 |
+
'speech_emotion':combined_speech
|
| 209 |
+
})
|
| 210 |
+
df.to_csv(os.path.join(output_dir,'combined_speech.csv'), index=False)
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
vid_index=[]
|
| 215 |
+
for i in range(videos):
|
| 216 |
+
timestamps=[j/30 for j in range(len(session_data[uid]['valence'][i]))]
|
| 217 |
+
for j in range(len(timestamps)):
|
| 218 |
+
vid_index.append(i+1)
|
| 219 |
+
folder_path=os.path.join(output_dir,f"{session_data[uid]['vcount'][i]}")
|
| 220 |
+
os.makedirs(folder_path, exist_ok=True)
|
| 221 |
+
plot_graph(timestamps,session_data[uid]['valence'][i],'valence',os.path.join(folder_path,'valence.png'))
|
| 222 |
+
plot_graph(timestamps,session_data[uid]['arousal'][i],'arousal',os.path.join(folder_path,'arousal.png'))
|
| 223 |
+
plot_graph(timestamps,session_data[uid]['stress'][i],'stress',os.path.join(folder_path,'stress.png'))
|
| 224 |
+
combined_arousal+=session_data[uid]['arousal'][i]
|
| 225 |
+
combined_valence+=session_data[uid]['valence'][i]
|
| 226 |
+
combined_stress+=session_data[uid]['stress'][i]
|
| 227 |
+
combined_fer+=session_data[uid]['fer'][i]
|
| 228 |
+
combined_blinks+=session_data[uid]['blinks'][i]
|
| 229 |
+
# combined_class_wise_frame_count+=session_data[uid]['class_wise_frame_counts'][i]
|
| 230 |
+
try:
|
| 231 |
+
max_value=max([x for x in combined_eye if isinstance(x, (int, float))])
|
| 232 |
+
except:
|
| 233 |
+
max_value=0
|
| 234 |
+
session_data[uid]['eye'][i]=[x + max_value if isinstance(x, (int, float)) else x for x in session_data[uid]['eye'][i]]
|
| 235 |
+
combined_eye+=session_data[uid]['eye'][i]
|
| 236 |
+
|
| 237 |
+
timestamps=[i/fps for i in range(len(combined_arousal))]
|
| 238 |
+
plot_graph(timestamps,combined_valence,'valence',os.path.join(output_dir,'valence.png'))
|
| 239 |
+
plot_graph(timestamps,combined_arousal,'arousal',os.path.join(output_dir,'arousal.png'))
|
| 240 |
+
plot_graph(timestamps,combined_stress,'stress',os.path.join(output_dir,'stress.png'))
|
| 241 |
+
print(len(timestamps),len(vid_index),len(combined_fer),len(combined_valence),len(combined_arousal),len(combined_stress),len(combined_eye))
|
| 242 |
+
df = pd.DataFrame({
|
| 243 |
+
'timestamps':timestamps,
|
| 244 |
+
'video_index': vid_index, # Add a column for video index
|
| 245 |
+
'fer': combined_fer,
|
| 246 |
+
'valence': combined_valence,
|
| 247 |
+
'arousal': combined_arousal,
|
| 248 |
+
'stress': combined_stress,
|
| 249 |
+
'eye': combined_eye,
|
| 250 |
+
})
|
| 251 |
+
df.to_csv(os.path.join(output_dir,'combined_data.csv'), index=False)
|
| 252 |
+
|
| 253 |
+
#generate metadata for Combined
|
| 254 |
+
comb_meta_data={}
|
| 255 |
+
try:
|
| 256 |
+
avg_blink_duration= float(sum(combined_blinks)/(len(combined_blinks)))
|
| 257 |
+
except:
|
| 258 |
+
avg_blink_duration=0
|
| 259 |
+
|
| 260 |
+
total_blinks=max([x for x in combined_eye if isinstance(x, (int, float))])
|
| 261 |
+
|
| 262 |
+
comb_meta_data['eye_emotion_recognition'] = {
|
| 263 |
+
"avg_blink_duration":avg_blink_duration,
|
| 264 |
+
"total_blinks": total_blinks,
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
dict_list = session_data[uid]['class_wise_frame_counts']
|
| 268 |
+
|
| 269 |
+
result = {}
|
| 270 |
+
for d in dict_list:
|
| 271 |
+
for key,value in d.items():
|
| 272 |
+
result[key]=result.get(key,0)+value
|
| 273 |
+
comb_meta_data['facial_emotion_recognition'] = {
|
| 274 |
+
"class_wise_frame_count": result,
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
combined_weights = Counter()
|
| 278 |
+
for word_weight in session_data[uid]['word_weights_list']:
|
| 279 |
+
combined_weights.update(word_weight)
|
| 280 |
+
combined_weights_dict = dict(combined_weights)
|
| 281 |
+
print(combined_weights_dict)
|
| 282 |
+
comb_meta_data['speech_emotion_recognition'] = {
|
| 283 |
+
'major_emotion':str(major_emotion),
|
| 284 |
+
'pause_length':statistics.mean([row[0] for row in session_data[uid]['speech_data']]),
|
| 285 |
+
'articulation_rate':statistics.mean([row[1] for row in session_data[uid]['speech_data']]),
|
| 286 |
+
'speaking_rate':statistics.mean([row[2] for row in session_data[uid]['speech_data']]),
|
| 287 |
+
'word_weights':combined_weights_dict
|
| 288 |
+
}
|
| 289 |
+
with open(os.path.join(output_dir,'combined.json'), 'w') as json_file:
|
| 290 |
+
json.dump(comb_meta_data, json_file)
|
| 291 |
+
log(f"Saving analytics for final output")
|
| 292 |
+
# send_analytics(valence_plot, arousal_plot,{
|
| 293 |
+
# "uid": uid,
|
| 294 |
+
# "user_id": user_id,
|
| 295 |
+
# "individual": meta_data,
|
| 296 |
+
# "combined": combined_meta_data,
|
| 297 |
+
# "count": count
|
| 298 |
+
# })
|
| 299 |
+
# send_individual_analytics_files(uid, output_dir, count)
|
| 300 |
+
# send_combined_analytics_files(uid, output_dir)
|
| 301 |
+
|
| 302 |
+
# shutil.rmtree(output_dir)
|
| 303 |
+
# print(f"Deleted output directory: {output_dir}")
|
| 304 |
+
except Exception as e:
|
| 305 |
+
print("Error analyzing video...: ", e)
|
| 306 |
+
error_trace = traceback.format_exc()
|
| 307 |
+
print("Error Trace: ", error_trace)
|
| 308 |
+
log(f"Error analyzing video for question - {count}")
|
| 309 |
+
# send_error(uid, {
|
| 310 |
+
# "message": str(e),
|
| 311 |
+
# "trace": error_trace
|
| 312 |
+
# })
|
| 313 |
+
shutil.rmtree('output')
|
| 314 |
+
print(f"Deleted output directory: {output_dir}")
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
# st=time.time()
|
| 318 |
+
# # analyze_live_video(video_path, uid, user_id, count, final, log)
|
| 319 |
+
# analyze_live_video('videos/s2.webm', 1,1,1,False,print)
|
| 320 |
+
# analyze_live_video('videos/a4.webm', 1,1,2,True,print)
|
| 321 |
+
|
| 322 |
+
# analyze_live_video('videos/s2.webm', 1,1,2,True,print)
|
| 323 |
# print("time taken - ",time.time()-st)
|