Commit ·
8ae78b0
1
Parent(s): cf407fe
Add backend application and Dockerfile
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +3 -0
- .gitignore +157 -0
- Dockerfile +77 -0
- README.md +5 -4
- behavior_backend/.dockerignore +24 -0
- behavior_backend/.env.exemple +13 -0
- behavior_backend/README.md +204 -0
- behavior_backend/__init__.py +0 -0
- behavior_backend/app.db +3 -0
- behavior_backend/app/__init__.py +2 -0
- behavior_backend/app/api/__init__.py +0 -0
- behavior_backend/app/api/routes/__init__.py +6 -0
- behavior_backend/app/api/routes/auth.py +119 -0
- behavior_backend/app/api/routes/health.py +27 -0
- behavior_backend/app/api/routes/processing.py +230 -0
- behavior_backend/app/api/routes/users.py +73 -0
- behavior_backend/app/api/routes/videos.py +454 -0
- behavior_backend/app/core/README_AUTH.md +93 -0
- behavior_backend/app/core/__init__.py +0 -0
- behavior_backend/app/core/config.py +57 -0
- behavior_backend/app/core/exceptions.py +55 -0
- behavior_backend/app/db/__init__.py +0 -0
- behavior_backend/app/db/base.py +28 -0
- behavior_backend/app/db/models.py +47 -0
- behavior_backend/app/db/repositories/__init__.py +0 -0
- behavior_backend/app/db/repositories/results.py +110 -0
- behavior_backend/app/db/repositories/video.py +78 -0
- behavior_backend/app/models/__init__.py +0 -0
- behavior_backend/app/models/processing.py +44 -0
- behavior_backend/app/models/token.py +13 -0
- behavior_backend/app/models/user.py +37 -0
- behavior_backend/app/models/video.py +38 -0
- behavior_backend/app/services/__init__.py +0 -0
- behavior_backend/app/services/processing/__init__.py +0 -0
- behavior_backend/app/services/processing/ai_analysis.py +850 -0
- behavior_backend/app/services/processing/ai_face_analyzer.py +299 -0
- behavior_backend/app/services/processing/body_language_analyzer.py +1100 -0
- behavior_backend/app/services/processing/emotion_analyzer.py +1733 -0
- behavior_backend/app/services/processing/eye_contact_analyzer.py +1739 -0
- behavior_backend/app/services/processing/processing_service.py +293 -0
- behavior_backend/app/services/processing/speech_service.py +530 -0
- behavior_backend/app/services/processing/temp/video_processor.py +174 -0
- behavior_backend/app/services/processing/video_processor.py +644 -0
- behavior_backend/app/services/video_service.py +262 -0
- behavior_backend/app/utils/__init__.py +0 -0
- behavior_backend/app/utils/auth.py +195 -0
- behavior_backend/app/utils/data_utils.py +268 -0
- behavior_backend/app/utils/device_utils.py +161 -0
- behavior_backend/app/utils/file_utils.py +49 -0
- behavior_backend/app/utils/logging_utils.py +256 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
*.db filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
pip-wheel-metadata/
|
| 24 |
+
share/python-wheels/
|
| 25 |
+
*.egg-info/
|
| 26 |
+
.installed.cfg
|
| 27 |
+
*.egg
|
| 28 |
+
MANIFEST
|
| 29 |
+
|
| 30 |
+
# PyInstaller
|
| 31 |
+
# Usually these files are generated by PyInstaller, if you are using it.
|
| 32 |
+
# *.spec
|
| 33 |
+
|
| 34 |
+
# Installer logs
|
| 35 |
+
pip-log.txt
|
| 36 |
+
pip-delete-this-directory.txt
|
| 37 |
+
|
| 38 |
+
# Unit test / coverage reports
|
| 39 |
+
htmlcov/
|
| 40 |
+
.tox/
|
| 41 |
+
.nox/
|
| 42 |
+
.coverage
|
| 43 |
+
.coverage.*
|
| 44 |
+
.cache
|
| 45 |
+
nosetests.xml
|
| 46 |
+
coverage.xml
|
| 47 |
+
*.cover
|
| 48 |
+
*.py,cover
|
| 49 |
+
.hypothesis/
|
| 50 |
+
.pytest_cache/
|
| 51 |
+
|
| 52 |
+
# Translations
|
| 53 |
+
*.mo
|
| 54 |
+
*.pot
|
| 55 |
+
|
| 56 |
+
# Django stuff:
|
| 57 |
+
*.log
|
| 58 |
+
local_settings.py
|
| 59 |
+
db.sqlite3
|
| 60 |
+
db.sqlite3-journal
|
| 61 |
+
|
| 62 |
+
# Flask stuff:
|
| 63 |
+
instance/
|
| 64 |
+
.webassets-cache
|
| 65 |
+
|
| 66 |
+
# Scrapy stuff:
|
| 67 |
+
.scrapy
|
| 68 |
+
|
| 69 |
+
# Sphinx documentation
|
| 70 |
+
docs/_build/
|
| 71 |
+
|
| 72 |
+
# Jupyter Notebook
|
| 73 |
+
.ipynb_checkpoints
|
| 74 |
+
|
| 75 |
+
# IPython
|
| 76 |
+
profile_default/
|
| 77 |
+
ipython_config.py
|
| 78 |
+
|
| 79 |
+
# pyenv
|
| 80 |
+
.python-version
|
| 81 |
+
|
| 82 |
+
# pipenv
|
| 83 |
+
# According to recommendations, Pipfile.lock should NOT be ignored
|
| 84 |
+
# Pipfile
|
| 85 |
+
|
| 86 |
+
# poetry
|
| 87 |
+
# poetry.lock
|
| 88 |
+
|
| 89 |
+
# PEP 582; __pypackages__ directory
|
| 90 |
+
__pypackages__/
|
| 91 |
+
|
| 92 |
+
# Celery stuff
|
| 93 |
+
celerybeat-schedule
|
| 94 |
+
celerybeat.pid
|
| 95 |
+
|
| 96 |
+
# SageMath parsed files
|
| 97 |
+
*.sage.py
|
| 98 |
+
|
| 99 |
+
# Environments
|
| 100 |
+
.env
|
| 101 |
+
.venv
|
| 102 |
+
env/
|
| 103 |
+
venv/
|
| 104 |
+
ENV/
|
| 105 |
+
env.bak/
|
| 106 |
+
venv.bak/
|
| 107 |
+
|
| 108 |
+
# Spyder project settings
|
| 109 |
+
.spyderproject
|
| 110 |
+
.spyproject
|
| 111 |
+
|
| 112 |
+
# Rope project settings
|
| 113 |
+
.ropeproject
|
| 114 |
+
|
| 115 |
+
# mkdocs documentation
|
| 116 |
+
/site
|
| 117 |
+
|
| 118 |
+
# mypy
|
| 119 |
+
.mypy_cache/
|
| 120 |
+
.dmypy.json
|
| 121 |
+
dmypy.json
|
| 122 |
+
|
| 123 |
+
# Pyre type checker
|
| 124 |
+
.pyre/
|
| 125 |
+
|
| 126 |
+
# IDEs and editors
|
| 127 |
+
.idea/
|
| 128 |
+
.vscode/
|
| 129 |
+
*.swp
|
| 130 |
+
*~
|
| 131 |
+
*.sublime-project
|
| 132 |
+
*.sublime-workspace
|
| 133 |
+
|
| 134 |
+
# OS-generated files
|
| 135 |
+
.DS_Store
|
| 136 |
+
.DS_Store?
|
| 137 |
+
._*
|
| 138 |
+
.Spotlight-V100
|
| 139 |
+
.Trashes
|
| 140 |
+
ehthumbs.db
|
| 141 |
+
Thumbs.db
|
| 142 |
+
|
| 143 |
+
# Log files
|
| 144 |
+
logs/
|
| 145 |
+
*.log
|
| 146 |
+
|
| 147 |
+
# Uploads and results (similar to .dockerignore but good for git too if these are runtime)
|
| 148 |
+
# If you want to track the empty directories, you might add a .gitkeep file inside them
|
| 149 |
+
# and then list them here if the contents should always be ignored.
|
| 150 |
+
# For now, matching the .dockerignore behavior:
|
| 151 |
+
static/uploads/*
|
| 152 |
+
static/results/*
|
| 153 |
+
|
| 154 |
+
# Other
|
| 155 |
+
*.bk
|
| 156 |
+
*.bak
|
| 157 |
+
*.tmp
|
Dockerfile
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Stage 1: Builder (if you still want to use a virtual env, otherwise can simplify)
|
| 2 |
+
FROM python:3.11-slim as builder
|
| 3 |
+
|
| 4 |
+
ARG DEBIAN_FRONTEND=noninteractive
|
| 5 |
+
WORKDIR /opt/builder_app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies only needed for building, if any
|
| 8 |
+
# For this setup, most deps are runtime, so we can simplify.
|
| 9 |
+
# If your pip install has complex build steps, keep build-essential etc.
|
| 10 |
+
RUN apt-get update && apt-get install -y \
|
| 11 |
+
build-essential \
|
| 12 |
+
# libpq-dev is needed to build psycopg2 if it's a direct dependency
|
| 13 |
+
# If psycopg2-binary is used, libpq-dev might not be needed at build time
|
| 14 |
+
# but libpq5 (runtime lib) will be needed in final stage.
|
| 15 |
+
# For simplicity, assuming pip handles it or it's binary.
|
| 16 |
+
# If build fails on psycopg2, add libpq-dev here.
|
| 17 |
+
python3-venv \
|
| 18 |
+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 19 |
+
|
| 20 |
+
COPY requirements.txt .
|
| 21 |
+
|
| 22 |
+
RUN python3 -m venv /opt/venv
|
| 23 |
+
ENV PATH="/opt/venv/bin:$PATH"
|
| 24 |
+
|
| 25 |
+
# Upgrade pip and install requirements
|
| 26 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 27 |
+
pip install --no-cache-dir -r requirements.txt
|
| 28 |
+
|
| 29 |
+
# Stage 2: Final image
|
| 30 |
+
FROM python:3.11-slim
|
| 31 |
+
|
| 32 |
+
ARG DEBIAN_FRONTEND=noninteractive
|
| 33 |
+
|
| 34 |
+
# Install runtime system dependencies
|
| 35 |
+
RUN apt-get update && apt-get install -y \
|
| 36 |
+
libgl1-mesa-glx \
|
| 37 |
+
libglib2.0-0 \
|
| 38 |
+
ffmpeg \
|
| 39 |
+
# postgresql-client # For running psql command, not strictly for app connection via libpq
|
| 40 |
+
libpq5 \ # Runtime library for psycopg2
|
| 41 |
+
curl \
|
| 42 |
+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 43 |
+
|
| 44 |
+
# Copy virtual environment from builder stage
|
| 45 |
+
COPY --from=builder /opt/venv /opt/venv
|
| 46 |
+
ENV PATH="/opt/venv/bin:$PATH"
|
| 47 |
+
|
| 48 |
+
# Create a non-root user 'appuser' (ID 1000 is common)
|
| 49 |
+
# and set up its home directory.
|
| 50 |
+
RUN useradd -m -u 1000 appuser
|
| 51 |
+
ENV HOME=/home/appuser
|
| 52 |
+
# PATH is already set to include venv
|
| 53 |
+
|
| 54 |
+
WORKDIR $HOME/app # Set workdir to user's home app subfolder
|
| 55 |
+
|
| 56 |
+
# Copy application code into the appuser's WORKDIR, and set ownership
|
| 57 |
+
# These files (requirements.txt, app/, main.py) should be in the root of your HF Space Git repo
|
| 58 |
+
COPY --chown=appuser:appuser requirements.txt .
|
| 59 |
+
COPY --chown=appuser:appuser app/ ./app/
|
| 60 |
+
COPY --chown=appuser:appuser main.py .
|
| 61 |
+
|
| 62 |
+
# Create directories your application might need, ensure appuser has write access
|
| 63 |
+
# If these are created by your app at runtime and are within $HOME/app, it should be fine.
|
| 64 |
+
# If they are absolute paths outside $HOME, you need to ensure appuser can write to them.
|
| 65 |
+
# Example: RUN mkdir -p /app/static/uploads /app/static/results && chown -R appuser:appuser /app/static
|
| 66 |
+
# For now, assuming your app creates these within its runtime context if needed.
|
| 67 |
+
# If UPLOAD_DIR is /app/static/uploads, and WORKDIR is /home/appuser/app,
|
| 68 |
+
# your app's relative path for uploads might be `static/uploads`.
|
| 69 |
+
|
| 70 |
+
# Switch to the non-root user
|
| 71 |
+
USER appuser
|
| 72 |
+
|
| 73 |
+
# Expose the port Hugging Face Spaces expects (default is 7860)
|
| 74 |
+
EXPOSE 7860
|
| 75 |
+
|
| 76 |
+
# Command to run the application on the correct port for Hugging Face
|
| 77 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Video Processing
|
| 3 |
+
emoji: 🦀
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: yellow
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
license: apache-2.0
|
| 9 |
---
|
| 10 |
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
behavior_backend/.dockerignore
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.py[cod]
|
| 3 |
+
*$py.class
|
| 4 |
+
*.so
|
| 5 |
+
.Python
|
| 6 |
+
venv/
|
| 7 |
+
.venv/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib64/
|
| 14 |
+
parts/
|
| 15 |
+
sdist/
|
| 16 |
+
var/
|
| 17 |
+
*.egg-info/
|
| 18 |
+
.installed.cfg
|
| 19 |
+
*.egg
|
| 20 |
+
*.log
|
| 21 |
+
logs/
|
| 22 |
+
static/uploads/*
|
| 23 |
+
static/results/*
|
| 24 |
+
.DS_Store
|
behavior_backend/.env.exemple
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Database configuration (using SQLite by default, but you can use another DB)
|
| 2 |
+
DATABASE_URL=sqlite:///app.db
|
| 3 |
+
|
| 4 |
+
# Security settings
|
| 5 |
+
SECRET_KEY=your-secret-key-here
|
| 6 |
+
API_KEY=your-api-key-here
|
| 7 |
+
|
| 8 |
+
# AI Service API Keys
|
| 9 |
+
OPENAI_API_KEY=your-openai-api-key
|
| 10 |
+
GROQ_API_KEY=your-groq-api-key
|
| 11 |
+
|
| 12 |
+
# CORS settings (for production, specify your frontend URL)
|
| 13 |
+
# CORS_ORIGINS=http://localhost:3000
|
behavior_backend/README.md
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EmotiVid API
|
| 2 |
+
|
| 3 |
+
A modular FastAPI backend for video behavior and emotion analysis.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
EmotiVid API is a powerful backend service that analyzes videos to detect emotions, facial expressions, body language, eye contact, and transcribe speech. It provides comprehensive analysis of the content using computer vision, machine learning, and natural language processing techniques.
|
| 8 |
+
|
| 9 |
+
## Features
|
| 10 |
+
|
| 11 |
+
- **Video Management**: Upload, list, and retrieve video metadata
|
| 12 |
+
- **Emotion Analysis**: Detect emotions in video frames using deep learning models
|
| 13 |
+
- **Eye Contact Analysis**: Measure eye contact consistency and engagement
|
| 14 |
+
- **Body Language Analysis**: Analyze posture, gestures, and non-verbal cues
|
| 15 |
+
- **Speech-to-Text**: Transcribe speech in videos using Whisper
|
| 16 |
+
- **AI Analysis**: Process results using OpenAI and Groq for deeper insights
|
| 17 |
+
- **Background Processing**: Process videos asynchronously with status updates
|
| 18 |
+
- **Annotated Video Generation**: Generate videos with behavior annotations
|
| 19 |
+
- **User Authentication**: Secure API with JWT authentication
|
| 20 |
+
- **API Key Authentication**: Alternative authentication method for direct integrations
|
| 21 |
+
|
| 22 |
+
## Project Structure
|
| 23 |
+
|
| 24 |
+
```
|
| 25 |
+
behavior_backend/
|
| 26 |
+
├── app/ # Application code
|
| 27 |
+
│ ├── api/ # API endpoints
|
| 28 |
+
│ │ ├── routes/ # Route definitions
|
| 29 |
+
│ ├── core/ # Core application code
|
| 30 |
+
│ │ ├── config.py # Configuration management
|
| 31 |
+
│ │ └── exceptions.py # Custom exceptions
|
| 32 |
+
│ ├── db/ # Database related code
|
| 33 |
+
│ │ ├── base.py # Database setup
|
| 34 |
+
│ │ ├── models.py # SQLAlchemy models
|
| 35 |
+
│ │ └── repositories/ # Database access layer
|
| 36 |
+
│ ├── models/ # Pydantic models for API
|
| 37 |
+
│ ├── services/ # Business logic
|
| 38 |
+
│ │ ├── video_service.py # Video management service
|
| 39 |
+
│ │ └── processing/ # Processing services
|
| 40 |
+
│ │ ├── video_processor.py # Main video processing pipeline
|
| 41 |
+
│ │ ├── emotion_analyzer.py # Facial emotion analysis
|
| 42 |
+
│ │ ├── eye_contact_analyzer.py # Eye contact detection
|
| 43 |
+
│ │ ├── body_language_analyzer.py # Body language analysis
|
| 44 |
+
│ │ ├── speech_service.py # Speech transcription
|
| 45 |
+
│ │ └── ai_analysis.py # AI-powered insights
|
| 46 |
+
│ └── utils/ # Utility functions
|
| 47 |
+
├── static/ # Static files
|
| 48 |
+
│ ├── uploads/ # Upload directory
|
| 49 |
+
│ └── results/ # Results directory
|
| 50 |
+
├── annotated_videos/ # Processed videos with annotations
|
| 51 |
+
├── temp_face_frames/ # Temporary storage for processed frames
|
| 52 |
+
├── logs/ # Application logs
|
| 53 |
+
├── tests/ # Test directory
|
| 54 |
+
├── .env # Environment variables
|
| 55 |
+
├── main.py # Application entry point
|
| 56 |
+
├── requirements.txt # Production dependencies
|
| 57 |
+
├── requirements-dev.txt # Development dependencies
|
| 58 |
+
├── run.sh # Production server script
|
| 59 |
+
└── start_server.sh # Development server script
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
## Prerequisites
|
| 63 |
+
|
| 64 |
+
- Python 3.9+
|
| 65 |
+
- FFmpeg (for video processing)
|
| 66 |
+
- GPU support (optional, for faster processing)
|
| 67 |
+
|
| 68 |
+
## Installation
|
| 69 |
+
|
| 70 |
+
### Option 1: Using Python Virtual Environment
|
| 71 |
+
|
| 72 |
+
1. Create and activate a virtual environment:
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
# Windows
|
| 76 |
+
python -m venv venv
|
| 77 |
+
venv\Scripts\activate
|
| 78 |
+
|
| 79 |
+
# Linux/Mac
|
| 80 |
+
python -m venv venv
|
| 81 |
+
source venv/bin/activate
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
2. Install dependencies:
|
| 85 |
+
|
| 86 |
+
```bash
|
| 87 |
+
# For production
|
| 88 |
+
pip install -r requirements.txt
|
| 89 |
+
|
| 90 |
+
# For development
|
| 91 |
+
pip install -r requirements-dev.txt
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
3. Create a `.env` file with the following variables:
|
| 95 |
+
```
|
| 96 |
+
DATABASE_URL=sqlite:///app.db
|
| 97 |
+
OPENAI_API_KEY=your_openai_api_key
|
| 98 |
+
GROQ_API_KEY=your_groq_api_key
|
| 99 |
+
SECRET_KEY=your_secret_key
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
## Running the Application
|
| 103 |
+
|
| 104 |
+
### Development Server
|
| 105 |
+
|
| 106 |
+
For development with hot-reload and debug features:
|
| 107 |
+
|
| 108 |
+
```bash
|
| 109 |
+
# Windows
|
| 110 |
+
venv\Scripts\activate
|
| 111 |
+
uvicorn main:app --reload
|
| 112 |
+
|
| 113 |
+
# Linux/Mac
|
| 114 |
+
source venv/bin/activate
|
| 115 |
+
./start_server.sh
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
This will start the development server with hot-reload enabled and make the API available at http://localhost:8000.
|
| 119 |
+
|
| 120 |
+
### Production Server
|
| 121 |
+
|
| 122 |
+
For production deployment:
|
| 123 |
+
|
| 124 |
+
```bash
|
| 125 |
+
# Windows
|
| 126 |
+
venv\Scripts\activate
|
| 127 |
+
uvicorn main:app --workers 4 --host 0.0.0.0 --port 8000
|
| 128 |
+
|
| 129 |
+
# Linux/Mac
|
| 130 |
+
source venv/bin/activate
|
| 131 |
+
./run.sh
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
## API Documentation
|
| 135 |
+
|
| 136 |
+
API documentation is available at:
|
| 137 |
+
|
| 138 |
+
- Swagger UI: http://localhost:8000/docs
|
| 139 |
+
- ReDoc: http://localhost:8000/redoc
|
| 140 |
+
|
| 141 |
+
### Key Endpoints
|
| 142 |
+
|
| 143 |
+
- `POST /api/v1/videos/upload`: Upload a video
|
| 144 |
+
- `GET /api/v1/videos`: List all videos
|
| 145 |
+
- `GET /api/v1/videos/{video_id}`: Get video metadata
|
| 146 |
+
- `POST /api/v1/processing/analyze/{video_id}`: Process a video
|
| 147 |
+
- `GET /api/v1/processing/status/{video_id}`: Get processing status
|
| 148 |
+
- `GET /api/v1/processing/results/{video_id}`: Get processing results
|
| 149 |
+
- `POST /api/v1/auth/login`: User login
|
| 150 |
+
- `POST /api/v1/auth/register`: User registration
|
| 151 |
+
- `GET /api/v1/users/me`: Get current user
|
| 152 |
+
- `POST /api/v1/videos/upload-and-process-direct`: Upload and process a video with API key authentication
|
| 153 |
+
|
| 154 |
+
## API Key Authentication
|
| 155 |
+
|
| 156 |
+
Some endpoints support API key authentication for direct integration with other systems. To use these endpoints:
|
| 157 |
+
|
| 158 |
+
1. Set the `API_KEY` environment variable or let it auto-generate
|
| 159 |
+
2. Include the API key in the `X-API-Key` header with your requests
|
| 160 |
+
3. Endpoints that support API key authentication are documented in the API docs
|
| 161 |
+
|
| 162 |
+
Example:
|
| 163 |
+
```bash
|
| 164 |
+
curl -X POST "http://localhost:8000/api/v1/videos/upload-and-process-direct" \
|
| 165 |
+
-H "X-API-Key: your-api-key" \
|
| 166 |
+
-H "accept: application/json" \
|
| 167 |
+
-H "Content-Type: multipart/form-data" \
|
| 168 |
+
-F "file=@video.mp4" \
|
| 169 |
+
-F "frame_rate=35" \
|
| 170 |
+
-F "backend=mediapipe"
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
## Testing
|
| 174 |
+
|
| 175 |
+
Run tests with pytest:
|
| 176 |
+
|
| 177 |
+
```bash
|
| 178 |
+
pytest
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
## Development Tools
|
| 182 |
+
|
| 183 |
+
The project includes several development tools:
|
| 184 |
+
|
| 185 |
+
- **Black**: Code formatting
|
| 186 |
+
- **isort**: Import sorting
|
| 187 |
+
- **flake8**: Code linting
|
| 188 |
+
- **mypy**: Type checking
|
| 189 |
+
- **pytest**: Testing framework
|
| 190 |
+
|
| 191 |
+
## Environment Variables
|
| 192 |
+
|
| 193 |
+
| Variable | Description | Default |
|
| 194 |
+
| -------------- | -------------------------------------- | ------------------------- |
|
| 195 |
+
| DATABASE_URL | SQLite or PostgreSQL connection string | sqlite:///app.db |
|
| 196 |
+
| SECRET_KEY | JWT secret key | None |
|
| 197 |
+
| API_KEY | API key for direct endpoints | Auto-generated |
|
| 198 |
+
| OPENAI_API_KEY | OpenAI API key for analysis | None |
|
| 199 |
+
| GROQ_API_KEY | Groq API key for analysis | None |
|
| 200 |
+
| CORS_ORIGINS | Allowed CORS origins | ["http://localhost:3000"] |
|
| 201 |
+
|
| 202 |
+
## License
|
| 203 |
+
|
| 204 |
+
This project is licensed under the MIT License.
|
behavior_backend/__init__.py
ADDED
|
File without changes
|
behavior_backend/app.db
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:363582c5479feabd14addd25318655ceb736f4186816d814b7d6d8ccd530765e
|
| 3 |
+
size 233472
|
behavior_backend/app/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is kept to make the app directory a package
|
| 2 |
+
# The FastAPI instance is now created in main.py
|
behavior_backend/app/api/__init__.py
ADDED
|
File without changes
|
behavior_backend/app/api/routes/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Empty __init__.py file
|
| 2 |
+
|
| 3 |
+
# Import routers
|
| 4 |
+
from app.api.routes.videos import router as videos_router
|
| 5 |
+
from app.api.routes.processing import router as processing_router
|
| 6 |
+
from app.api.routes.users import router as users_router
|
behavior_backend/app/api/routes/auth.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, HTTPException, status
|
| 2 |
+
from fastapi.security import OAuth2PasswordRequestForm
|
| 3 |
+
from sqlalchemy.orm import Session
|
| 4 |
+
from typing import Annotated
|
| 5 |
+
from datetime import timedelta
|
| 6 |
+
|
| 7 |
+
from app.models.user import UserLogin
|
| 8 |
+
from app.models.token import Token
|
| 9 |
+
from app.db.base import get_db
|
| 10 |
+
from app.db.models import User
|
| 11 |
+
from app.utils.security import verify_password
|
| 12 |
+
from app.utils.auth import create_access_token, get_current_user
|
| 13 |
+
from app.core.config import settings
|
| 14 |
+
|
| 15 |
+
router = APIRouter(
|
| 16 |
+
prefix="/auth",
|
| 17 |
+
tags=["Authentication"]
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
@router.post("/login", response_model=Token)
|
| 21 |
+
async def login(
|
| 22 |
+
form_data: Annotated[OAuth2PasswordRequestForm, Depends()],
|
| 23 |
+
db: Annotated[Session, Depends(get_db)]
|
| 24 |
+
):
|
| 25 |
+
"""
|
| 26 |
+
OAuth2 compatible token login, get an access token for future requests.
|
| 27 |
+
"""
|
| 28 |
+
# Find the user by email
|
| 29 |
+
user = db.query(User).filter(User.email == form_data.username).first()
|
| 30 |
+
|
| 31 |
+
if not user:
|
| 32 |
+
raise HTTPException(
|
| 33 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 34 |
+
detail="Incorrect email or password",
|
| 35 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# Verify the password
|
| 39 |
+
if not verify_password(form_data.password, user.hashed_password):
|
| 40 |
+
raise HTTPException(
|
| 41 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 42 |
+
detail="Incorrect email or password",
|
| 43 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Create access token
|
| 47 |
+
access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
|
| 48 |
+
access_token = create_access_token(
|
| 49 |
+
data={"sub": user.id},
|
| 50 |
+
expires_delta=access_token_expires
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Return token
|
| 54 |
+
return {"access_token": access_token, "token_type": "bearer"}
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
@router.post("/login/email", response_model=Token)
|
| 58 |
+
async def login_with_email(
|
| 59 |
+
user_credentials: UserLogin,
|
| 60 |
+
db: Annotated[Session, Depends(get_db)]
|
| 61 |
+
):
|
| 62 |
+
"""
|
| 63 |
+
Login with email and password, get an access token for future requests.
|
| 64 |
+
"""
|
| 65 |
+
# Find the user by email
|
| 66 |
+
user = db.query(User).filter(User.email == user_credentials.email).first()
|
| 67 |
+
|
| 68 |
+
if not user:
|
| 69 |
+
raise HTTPException(
|
| 70 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 71 |
+
detail="Incorrect email or password",
|
| 72 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Verify the password
|
| 76 |
+
if not verify_password(user_credentials.password, user.hashed_password):
|
| 77 |
+
raise HTTPException(
|
| 78 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 79 |
+
detail="Incorrect email or password",
|
| 80 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
# Create access token
|
| 84 |
+
access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
|
| 85 |
+
access_token = create_access_token(
|
| 86 |
+
data={"sub": user.id},
|
| 87 |
+
expires_delta=access_token_expires
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Return token
|
| 91 |
+
return {"access_token": access_token, "token_type": "bearer"}
|
| 92 |
+
|
| 93 |
+
@router.post("/refresh", response_model=Token)
|
| 94 |
+
async def refresh_token(
|
| 95 |
+
current_user: Annotated[User, Depends(get_current_user)],
|
| 96 |
+
):
|
| 97 |
+
"""
|
| 98 |
+
Refresh the access token before it expires.
|
| 99 |
+
"""
|
| 100 |
+
try:
|
| 101 |
+
print(f"[refresh_token] Processing refresh request for user: {current_user.id}")
|
| 102 |
+
|
| 103 |
+
# Create a new access token with the current user's ID
|
| 104 |
+
access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
|
| 105 |
+
access_token = create_access_token(
|
| 106 |
+
data={"sub": current_user.id},
|
| 107 |
+
expires_delta=access_token_expires
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
print(f"[refresh_token] Successfully refreshed token for user: {current_user.id}")
|
| 111 |
+
|
| 112 |
+
# Return the new token
|
| 113 |
+
return {"access_token": access_token, "token_type": "bearer"}
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"[refresh_token] Error refreshing token for user {current_user.id}: {str(e)}")
|
| 116 |
+
raise HTTPException(
|
| 117 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 118 |
+
detail=f"Error refreshing token: {str(e)}",
|
| 119 |
+
)
|
behavior_backend/app/api/routes/health.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends
|
| 2 |
+
from sqlalchemy.orm import Session
|
| 3 |
+
from app.db.base import get_db
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
router = APIRouter(
|
| 9 |
+
prefix="/health",
|
| 10 |
+
tags=["health"],
|
| 11 |
+
responses={404: {"description": "Not found"}},
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
@router.get("")
|
| 15 |
+
async def health_check(db: Session = Depends(get_db)):
|
| 16 |
+
"""
|
| 17 |
+
Perform a health check of the application.
|
| 18 |
+
Checks database connectivity.
|
| 19 |
+
"""
|
| 20 |
+
try:
|
| 21 |
+
# Try to execute a simple query to check DB connection
|
| 22 |
+
db.execute("SELECT 1")
|
| 23 |
+
logger.info("Health check: Database connection successful.")
|
| 24 |
+
return {"status": "healthy", "database": "connected"}
|
| 25 |
+
except Exception as e:
|
| 26 |
+
logger.error(f"Health check: Database connection failed - {str(e)}")
|
| 27 |
+
return {"status": "unhealthy", "database": "disconnected", "error": str(e)}
|
behavior_backend/app/api/routes/processing.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, BackgroundTasks
|
| 2 |
+
from sqlalchemy.orm import Session
|
| 3 |
+
|
| 4 |
+
from app.db.base import get_db
|
| 5 |
+
from app.models.processing import ProcessingRequest, ProcessingStatus
|
| 6 |
+
from app.services.processing.processing_service import ProcessingService
|
| 7 |
+
from app.utils.logging_utils import setup_logger, log_success
|
| 8 |
+
from app.utils.auth import get_current_active_user, get_api_key_user
|
| 9 |
+
from app.db.models import User
|
| 10 |
+
|
| 11 |
+
"""
|
| 12 |
+
Video Processing API Routes
|
| 13 |
+
==========================
|
| 14 |
+
|
| 15 |
+
This module provides API endpoints for video processing operations, including:
|
| 16 |
+
- Initiating video processing
|
| 17 |
+
- Checking processing status
|
| 18 |
+
- Retrieving processing results
|
| 19 |
+
|
| 20 |
+
All endpoints are prefixed with '/processing' and include appropriate logging
|
| 21 |
+
with endpoint identification in square brackets.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
# Setup logger for this module
|
| 25 |
+
logger = setup_logger("processing_router")
|
| 26 |
+
|
| 27 |
+
router = APIRouter(
|
| 28 |
+
prefix="/processing",
|
| 29 |
+
tags=["processing"],
|
| 30 |
+
responses={404: {"description": "Not found"}},
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
@router.post("", response_model=ProcessingStatus)
|
| 34 |
+
async def process_video(
|
| 35 |
+
request: ProcessingRequest,
|
| 36 |
+
background_tasks: BackgroundTasks,
|
| 37 |
+
db: Session = Depends(get_db),
|
| 38 |
+
current_user: User = Depends(get_current_active_user)
|
| 39 |
+
):
|
| 40 |
+
"""
|
| 41 |
+
Initiate processing of a video.
|
| 42 |
+
|
| 43 |
+
This endpoint accepts a video ID and initiates the processing pipeline
|
| 44 |
+
as a background task. It returns a processing status object with the
|
| 45 |
+
video ID that can be used to check the status later.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
request (ProcessingRequest): Request object containing the video ID and processing options
|
| 49 |
+
background_tasks (BackgroundTasks): FastAPI background tasks manager
|
| 50 |
+
db (Session): Database session dependency
|
| 51 |
+
current_user (User): Current active user dependency
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
ProcessingStatus: Object containing the video ID and initial processing status
|
| 55 |
+
|
| 56 |
+
Example:
|
| 57 |
+
POST /processing
|
| 58 |
+
{
|
| 59 |
+
"video_id": "vid-12345",
|
| 60 |
+
"frame_rate": 5,
|
| 61 |
+
"language": "en",
|
| 62 |
+
"generate_annotated_video": true
|
| 63 |
+
}
|
| 64 |
+
"""
|
| 65 |
+
logger.info(f"[process_video] Received request to process video: {request.video_id}")
|
| 66 |
+
processing_service = ProcessingService(db)
|
| 67 |
+
result = await processing_service.process_video(request, background_tasks)
|
| 68 |
+
logger.info(f" : {result.video_id}")
|
| 69 |
+
return result
|
| 70 |
+
|
| 71 |
+
@router.get("/status/{video_id}", response_model=ProcessingStatus)
|
| 72 |
+
async def get_processing_status(
|
| 73 |
+
video_id: str,
|
| 74 |
+
db: Session = Depends(get_db),
|
| 75 |
+
current_user: User = Depends(get_current_active_user)
|
| 76 |
+
):
|
| 77 |
+
"""
|
| 78 |
+
Get the current processing status of a video.
|
| 79 |
+
|
| 80 |
+
This endpoint retrieves the current status of a video processing job
|
| 81 |
+
using the video ID returned from the process_video endpoint.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
video_id (str): Unique identifier for the video processing job
|
| 85 |
+
db (Session): Database session dependency
|
| 86 |
+
current_user (User): Current active user dependency
|
| 87 |
+
|
| 88 |
+
Returns:
|
| 89 |
+
ProcessingStatus: Object containing the video ID and current processing status
|
| 90 |
+
|
| 91 |
+
Example:
|
| 92 |
+
GET /processing/status/vid-12345
|
| 93 |
+
"""
|
| 94 |
+
logger.info(f"[get_processing_status] Checking status for video ID: {video_id}")
|
| 95 |
+
processing_service = ProcessingService(db)
|
| 96 |
+
status = processing_service.get_processing_status(video_id)
|
| 97 |
+
logger.info(f"[get_processing_status] Status for video ID {video_id}: {status.status}")
|
| 98 |
+
return status
|
| 99 |
+
|
| 100 |
+
@router.get("/results/{video_id}")
|
| 101 |
+
async def get_processing_results(
|
| 102 |
+
video_id: str,
|
| 103 |
+
db: Session = Depends(get_db),
|
| 104 |
+
current_user: User = Depends(get_current_active_user)
|
| 105 |
+
):
|
| 106 |
+
"""
|
| 107 |
+
Get the results of a completed video processing job.
|
| 108 |
+
|
| 109 |
+
This endpoint retrieves the full results of a video processing job
|
| 110 |
+
after it has completed. It should only be called after the status
|
| 111 |
+
endpoint indicates that processing is complete.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
video_id (str): Unique identifier for the video processing job
|
| 115 |
+
db (Session): Database session dependency
|
| 116 |
+
current_user (User): Current active user dependency
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
dict: Processing results including behavior analytics data
|
| 120 |
+
|
| 121 |
+
Example:
|
| 122 |
+
GET /processing/results/vid-12345
|
| 123 |
+
"""
|
| 124 |
+
logger.info(f"[get_processing_results] Retrieving results for video ID: {video_id}")
|
| 125 |
+
processing_service = ProcessingService(db)
|
| 126 |
+
results = processing_service.get_processing_results(video_id)
|
| 127 |
+
log_success(logger, f"[get_processing_results] Successfully retrieved results for video ID: {video_id}")
|
| 128 |
+
return results
|
| 129 |
+
|
| 130 |
+
@router.post("/direct", response_model=ProcessingStatus)
|
| 131 |
+
async def process_video_direct(
|
| 132 |
+
request: ProcessingRequest,
|
| 133 |
+
background_tasks: BackgroundTasks,
|
| 134 |
+
db: Session = Depends(get_db),
|
| 135 |
+
api_key_valid: bool = Depends(get_api_key_user)
|
| 136 |
+
):
|
| 137 |
+
"""
|
| 138 |
+
Initiate processing of a video using API key authentication.
|
| 139 |
+
|
| 140 |
+
This endpoint accepts a video ID and initiates the processing pipeline
|
| 141 |
+
as a background task. It is secured with API key authentication.
|
| 142 |
+
|
| 143 |
+
Args:
|
| 144 |
+
request (ProcessingRequest): Request object containing the video ID and processing options
|
| 145 |
+
background_tasks (BackgroundTasks): FastAPI background tasks manager
|
| 146 |
+
db (Session): Database session dependency
|
| 147 |
+
api_key_valid (bool): API key validation dependency
|
| 148 |
+
|
| 149 |
+
Returns:
|
| 150 |
+
ProcessingStatus: Object containing the video ID and initial processing status
|
| 151 |
+
|
| 152 |
+
Example:
|
| 153 |
+
POST /processing/direct
|
| 154 |
+
X-API-Key: your-api-key
|
| 155 |
+
{
|
| 156 |
+
"video_id": "vid-12345",
|
| 157 |
+
"frame_rate": 5,
|
| 158 |
+
"language": "en",
|
| 159 |
+
"generate_annotated_video": true
|
| 160 |
+
}
|
| 161 |
+
"""
|
| 162 |
+
logger.info(f"[process_video_direct] Received request to process video: {request.video_id}")
|
| 163 |
+
processing_service = ProcessingService(db)
|
| 164 |
+
result = await processing_service.process_video(request, background_tasks)
|
| 165 |
+
logger.info(f"[process_video_direct] Started processing for video ID: {result.video_id}")
|
| 166 |
+
return result
|
| 167 |
+
|
| 168 |
+
@router.get("/direct/status/{video_id}", response_model=ProcessingStatus)
|
| 169 |
+
async def get_processing_status_direct(
|
| 170 |
+
video_id: str,
|
| 171 |
+
db: Session = Depends(get_db),
|
| 172 |
+
api_key_valid: bool = Depends(get_api_key_user)
|
| 173 |
+
):
|
| 174 |
+
"""
|
| 175 |
+
Get the current processing status of a video using API key authentication.
|
| 176 |
+
|
| 177 |
+
This endpoint retrieves the current status of a video processing job.
|
| 178 |
+
It is secured with API key authentication.
|
| 179 |
+
|
| 180 |
+
Args:
|
| 181 |
+
video_id (str): Unique identifier for the video processing job
|
| 182 |
+
db (Session): Database session dependency
|
| 183 |
+
api_key_valid (bool): API key validation dependency
|
| 184 |
+
|
| 185 |
+
Returns:
|
| 186 |
+
ProcessingStatus: Object containing the video ID and current processing status
|
| 187 |
+
|
| 188 |
+
Example:
|
| 189 |
+
GET /processing/direct/status/vid-12345
|
| 190 |
+
X-API-Key: your-api-key
|
| 191 |
+
"""
|
| 192 |
+
logger.info(f"[get_processing_status_direct] Checking status for video ID: {video_id}")
|
| 193 |
+
processing_service = ProcessingService(db)
|
| 194 |
+
try:
|
| 195 |
+
status = processing_service.get_processing_status(video_id)
|
| 196 |
+
logger.info(f"[get_processing_status_direct] Status for video ID {video_id}: {status.status}, Progress: {status.progress}")
|
| 197 |
+
return status
|
| 198 |
+
except Exception as e:
|
| 199 |
+
logger.error(f"[get_processing_status_direct] Error getting status: {str(e)}")
|
| 200 |
+
raise
|
| 201 |
+
|
| 202 |
+
@router.get("/direct/results/{video_id}")
|
| 203 |
+
async def get_processing_results_direct(
|
| 204 |
+
video_id: str,
|
| 205 |
+
db: Session = Depends(get_db),
|
| 206 |
+
api_key_valid: bool = Depends(get_api_key_user)
|
| 207 |
+
):
|
| 208 |
+
"""
|
| 209 |
+
Get the results of a completed video processing job using API key authentication.
|
| 210 |
+
|
| 211 |
+
This endpoint retrieves the full results of a video processing job
|
| 212 |
+
after it has completed. It is secured with API key authentication.
|
| 213 |
+
|
| 214 |
+
Args:
|
| 215 |
+
video_id (str): Unique identifier for the video processing job
|
| 216 |
+
db (Session): Database session dependency
|
| 217 |
+
api_key_valid (bool): API key validation dependency
|
| 218 |
+
|
| 219 |
+
Returns:
|
| 220 |
+
dict: Processing results including behavior analytics data
|
| 221 |
+
|
| 222 |
+
Example:
|
| 223 |
+
GET /processing/direct/results/vid-12345
|
| 224 |
+
X-API-Key: your-api-key
|
| 225 |
+
"""
|
| 226 |
+
logger.info(f"[get_processing_results_direct] Retrieving results for video ID: {video_id}")
|
| 227 |
+
processing_service = ProcessingService(db)
|
| 228 |
+
results = processing_service.get_processing_results(video_id)
|
| 229 |
+
log_success(logger, f"[get_processing_results_direct] Successfully retrieved results for video ID: {video_id}")
|
| 230 |
+
return results
|
behavior_backend/app/api/routes/users.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uuid
|
| 2 |
+
import logging
|
| 3 |
+
from fastapi import APIRouter, Depends, HTTPException, status
|
| 4 |
+
from sqlalchemy.orm import Session
|
| 5 |
+
from typing import List
|
| 6 |
+
|
| 7 |
+
from app.db.base import get_db
|
| 8 |
+
from app.db.models import User
|
| 9 |
+
from app.models.user import UserCreate, User as UserSchema, UserOut
|
| 10 |
+
from app.utils.security import get_password_hash
|
| 11 |
+
from app.utils.auth import get_current_active_user
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
router = APIRouter(
|
| 16 |
+
prefix="/users",
|
| 17 |
+
tags=["users"],
|
| 18 |
+
responses={404: {"description": "Not found"}},
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
@router.post("/", response_model=UserSchema, status_code=status.HTTP_201_CREATED)
|
| 22 |
+
def create_user(user: UserCreate, db: Session = Depends(get_db)):
|
| 23 |
+
"""
|
| 24 |
+
Create a new user.
|
| 25 |
+
"""
|
| 26 |
+
# Check if user with this email already exists
|
| 27 |
+
db_user = db.query(User).filter(User.email == user.email).first()
|
| 28 |
+
if db_user:
|
| 29 |
+
raise HTTPException(
|
| 30 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 31 |
+
detail="Email already registered"
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# Create new user
|
| 35 |
+
new_user = User(
|
| 36 |
+
id=str(uuid.uuid4()),
|
| 37 |
+
email=user.email,
|
| 38 |
+
first_name=user.first_name,
|
| 39 |
+
last_name=user.last_name,
|
| 40 |
+
hashed_password=get_password_hash(user.password)
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# Add to database
|
| 44 |
+
db.add(new_user)
|
| 45 |
+
db.commit()
|
| 46 |
+
db.refresh(new_user)
|
| 47 |
+
|
| 48 |
+
return new_user
|
| 49 |
+
|
| 50 |
+
@router.get("/me", response_model=UserOut)
|
| 51 |
+
async def read_users_me(current_user: User = Depends(get_current_active_user)):
|
| 52 |
+
"""
|
| 53 |
+
Get current user information.
|
| 54 |
+
"""
|
| 55 |
+
return current_user
|
| 56 |
+
|
| 57 |
+
@router.get("/{user_id}", response_model=UserOut)
|
| 58 |
+
def get_user_by_id(
|
| 59 |
+
user_id: str,
|
| 60 |
+
db: Session = Depends(get_db),
|
| 61 |
+
current_user: User = Depends(get_current_active_user)
|
| 62 |
+
) -> UserOut:
|
| 63 |
+
"""Get user by Id"""
|
| 64 |
+
logger.info(f"Getting user with id: {user_id}")
|
| 65 |
+
db_user = db.query(User).filter(User.id == user_id).first()
|
| 66 |
+
if not db_user:
|
| 67 |
+
logger.warning(f"User with id {user_id} not found")
|
| 68 |
+
raise HTTPException(
|
| 69 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
| 70 |
+
detail="User not found"
|
| 71 |
+
)
|
| 72 |
+
logger.info(f"Successfully retrieved user with id: {user_id}")
|
| 73 |
+
return db_user
|
behavior_backend/app/api/routes/videos.py
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, File, UploadFile, HTTPException, BackgroundTasks
|
| 2 |
+
from sqlalchemy.orm import Session
|
| 3 |
+
from typing import List, Dict, Any
|
| 4 |
+
import time
|
| 5 |
+
import asyncio
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
from app.db.base import get_db
|
| 9 |
+
from app.models.video import VideoMetadata
|
| 10 |
+
from app.models.processing import ProcessingRequest
|
| 11 |
+
from app.services.video_service import VideoService
|
| 12 |
+
from app.services.processing.processing_service import ProcessingService
|
| 13 |
+
from app.utils.logging_utils import setup_logger, log_success
|
| 14 |
+
from app.utils.auth import get_current_active_user, get_api_key_user
|
| 15 |
+
from app.db.models import User
|
| 16 |
+
from app.core.exceptions import VideoNotFoundError
|
| 17 |
+
from app.services.processing.video_processor import process_video
|
| 18 |
+
|
| 19 |
+
"""
|
| 20 |
+
Video Management API Routes
|
| 21 |
+
==========================
|
| 22 |
+
|
| 23 |
+
This module provides API endpoints for video management operations, including:
|
| 24 |
+
- Uploading video files
|
| 25 |
+
- Listing available videos
|
| 26 |
+
- Retrieving video metadata
|
| 27 |
+
|
| 28 |
+
All endpoints are prefixed with '/videos' and include appropriate logging
|
| 29 |
+
with endpoint identification in square brackets.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
# Setup logger for this module
|
| 33 |
+
logger = setup_logger("videos_router")
|
| 34 |
+
|
| 35 |
+
router = APIRouter(
|
| 36 |
+
prefix="/videos",
|
| 37 |
+
tags=["videos"],
|
| 38 |
+
responses={404: {"description": "Not found"}},
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
@router.post("/upload", response_model=VideoMetadata)
|
| 42 |
+
async def upload_video(
|
| 43 |
+
file: UploadFile = File(...),
|
| 44 |
+
db: Session = Depends(get_db),
|
| 45 |
+
current_user: User = Depends(get_current_active_user)
|
| 46 |
+
):
|
| 47 |
+
"""
|
| 48 |
+
Upload a video file to the system.
|
| 49 |
+
|
| 50 |
+
This endpoint accepts a video file upload, stores it in the system,
|
| 51 |
+
and returns metadata about the stored video including a unique ID.
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
file (UploadFile): The video file to upload
|
| 55 |
+
db (Session): Database session dependency
|
| 56 |
+
current_user (User): Current active user dependency
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
VideoMetadata: Metadata about the uploaded video
|
| 60 |
+
|
| 61 |
+
Example:
|
| 62 |
+
POST /videos/upload
|
| 63 |
+
Content-Type: multipart/form-data
|
| 64 |
+
|
| 65 |
+
file: [binary video data]
|
| 66 |
+
"""
|
| 67 |
+
logger.info(f"[upload_video] Received upload request for file: {file.filename}")
|
| 68 |
+
video_service = VideoService(db)
|
| 69 |
+
result = await video_service.upload_video(file)
|
| 70 |
+
log_success(logger, f"[upload_video] Successfully uploaded video with ID: {result.video_id}")
|
| 71 |
+
return result
|
| 72 |
+
|
| 73 |
+
@router.post("/upload-direct", response_model=VideoMetadata)
|
| 74 |
+
async def upload_video_direct(
|
| 75 |
+
file: UploadFile = File(...),
|
| 76 |
+
db: Session = Depends(get_db),
|
| 77 |
+
api_key_valid: bool = Depends(get_api_key_user)
|
| 78 |
+
):
|
| 79 |
+
"""
|
| 80 |
+
Upload a video file to the system using API key authentication.
|
| 81 |
+
|
| 82 |
+
This endpoint accepts a video file upload, stores it in the system,
|
| 83 |
+
and returns metadata about the stored video including a unique ID.
|
| 84 |
+
This endpoint is secured with API key authentication.
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
file (UploadFile): The video file to upload
|
| 88 |
+
db (Session): Database session dependency
|
| 89 |
+
api_key_valid (bool): API key validation dependency
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
VideoMetadata: Metadata about the uploaded video
|
| 93 |
+
|
| 94 |
+
Example:
|
| 95 |
+
POST /videos/upload-direct
|
| 96 |
+
Content-Type: multipart/form-data
|
| 97 |
+
X-API-Key: your-api-key
|
| 98 |
+
|
| 99 |
+
file: [binary video data]
|
| 100 |
+
"""
|
| 101 |
+
logger.info(f"[upload_video_direct] Received upload request for file: {file.filename}")
|
| 102 |
+
video_service = VideoService(db)
|
| 103 |
+
result = await video_service.upload_video(file)
|
| 104 |
+
log_success(logger, f"[upload_video_direct] Successfully uploaded video with ID: {result.video_id}")
|
| 105 |
+
return result
|
| 106 |
+
|
| 107 |
+
@router.post("/upload-and-process", response_model=Dict[str, Any])
|
| 108 |
+
async def upload_and_process_video(
|
| 109 |
+
file: UploadFile = File(...),
|
| 110 |
+
frame_rate: int = 5,
|
| 111 |
+
language: str = "en",
|
| 112 |
+
backend: str = "mediapipe",
|
| 113 |
+
generate_annotated_video: bool = False,
|
| 114 |
+
model_name: str = "gpt-4o",
|
| 115 |
+
db: Session = Depends(get_db),
|
| 116 |
+
current_user: User = Depends(get_current_active_user),
|
| 117 |
+
):
|
| 118 |
+
"""
|
| 119 |
+
Upload a video file, process it, and wait for results.
|
| 120 |
+
|
| 121 |
+
This endpoint combines uploading, processing, and getting results in one call.
|
| 122 |
+
It monitors the processing status and returns the results when complete.
|
| 123 |
+
|
| 124 |
+
Args:
|
| 125 |
+
file (UploadFile): The video file to upload
|
| 126 |
+
frame_rate (int): Frame rate for processing (1-90)
|
| 127 |
+
language (str): Language of the video
|
| 128 |
+
backend (str): Backend for face detection
|
| 129 |
+
generate_annotated_video (bool): Whether to generate annotated video
|
| 130 |
+
model_name (str): AI model to use for analysis
|
| 131 |
+
db (Session): Database session dependency
|
| 132 |
+
current_user (User): Current active user dependency
|
| 133 |
+
|
| 134 |
+
Returns:
|
| 135 |
+
Dict[str, Any]: Processing results with timing information
|
| 136 |
+
|
| 137 |
+
Example:
|
| 138 |
+
POST /videos/upload-and-process
|
| 139 |
+
Content-Type: multipart/form-data
|
| 140 |
+
|
| 141 |
+
file: [binary video data]
|
| 142 |
+
frame_rate: 5
|
| 143 |
+
language: en
|
| 144 |
+
backend: mediapipe
|
| 145 |
+
generate_annotated_video: false
|
| 146 |
+
model_name: gpt-4o
|
| 147 |
+
"""
|
| 148 |
+
start_time = time.time()
|
| 149 |
+
|
| 150 |
+
# Upload the video
|
| 151 |
+
logger.info(f"[upload_and_process] Received request to upload and process file: {file.filename}")
|
| 152 |
+
video_service = VideoService(db)
|
| 153 |
+
upload_result = await video_service.upload_video(file)
|
| 154 |
+
video_id = upload_result.video_id
|
| 155 |
+
upload_time = time.time() - start_time
|
| 156 |
+
logger.info(f"[upload_and_process] Video uploaded in {upload_time:.2f} seconds with ID: {video_id}")
|
| 157 |
+
|
| 158 |
+
# Start processing
|
| 159 |
+
processing_start_time = time.time()
|
| 160 |
+
processing_service = ProcessingService(db)
|
| 161 |
+
processing_request = ProcessingRequest(
|
| 162 |
+
video_id=video_id,
|
| 163 |
+
frame_rate=frame_rate,
|
| 164 |
+
backend=backend,
|
| 165 |
+
language=language,
|
| 166 |
+
generate_annotated_video=generate_annotated_video,
|
| 167 |
+
model_name=model_name
|
| 168 |
+
)
|
| 169 |
+
await processing_service.process_video(processing_request, background_tasks)
|
| 170 |
+
|
| 171 |
+
# Wait for processing to complete (poll status)
|
| 172 |
+
while True:
|
| 173 |
+
status = processing_service.get_processing_status(video_id)
|
| 174 |
+
if status.status == "completed":
|
| 175 |
+
break
|
| 176 |
+
elif status.status == "failed":
|
| 177 |
+
raise HTTPException(status_code=500, detail="Video processing failed")
|
| 178 |
+
|
| 179 |
+
# Wait a bit before checking again
|
| 180 |
+
await asyncio.sleep(2)
|
| 181 |
+
|
| 182 |
+
processing_time = time.time() - processing_start_time
|
| 183 |
+
logger.info(f"[upload_and_process] Video processed in {processing_time:.2f} seconds")
|
| 184 |
+
|
| 185 |
+
# Get processing results
|
| 186 |
+
results = processing_service.get_processing_results(video_id)
|
| 187 |
+
|
| 188 |
+
# Add timing information
|
| 189 |
+
total_time = time.time() - start_time
|
| 190 |
+
results["timing"] = {
|
| 191 |
+
"upload_time_seconds": upload_time,
|
| 192 |
+
"processing_time_seconds": processing_time,
|
| 193 |
+
"total_time_seconds": total_time
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
log_success(logger, f"[upload_and_process] Successfully processed video in {total_time:.2f} seconds")
|
| 197 |
+
return results
|
| 198 |
+
|
| 199 |
+
@router.post("/upload-and-process-direct", response_model=Dict[str, Any])
|
| 200 |
+
async def upload_and_process_video_direct(
|
| 201 |
+
file: UploadFile = File(...),
|
| 202 |
+
frame_rate: int = 35,
|
| 203 |
+
language: str = "en",
|
| 204 |
+
backend: str = "mediapipe",
|
| 205 |
+
generate_annotated_video: bool = False,
|
| 206 |
+
model_name: str = "gpt-4o",
|
| 207 |
+
db: Session = Depends(get_db),
|
| 208 |
+
api_key_valid: bool = Depends(get_api_key_user),
|
| 209 |
+
):
|
| 210 |
+
"""
|
| 211 |
+
Upload a video file and process it directly without background tasks.
|
| 212 |
+
|
| 213 |
+
This endpoint is secured with API key authentication.
|
| 214 |
+
|
| 215 |
+
Args:
|
| 216 |
+
file (UploadFile): The video file to upload
|
| 217 |
+
frame_rate (int): Frame rate for processing (1-90)
|
| 218 |
+
language (str): Language of the video
|
| 219 |
+
backend (str): Backend for face detection
|
| 220 |
+
generate_annotated_video (bool): Whether to generate annotated video
|
| 221 |
+
model_name (str): AI model to use for analysis
|
| 222 |
+
db (Session): Database session dependency
|
| 223 |
+
api_key_valid (bool): API key validation dependency
|
| 224 |
+
|
| 225 |
+
Returns:
|
| 226 |
+
Dict[str, Any]: Processing results with timing information
|
| 227 |
+
|
| 228 |
+
Example:
|
| 229 |
+
POST /videos/upload-and-process-direct
|
| 230 |
+
Content-Type: multipart/form-data
|
| 231 |
+
X-API-Key: your-api-key
|
| 232 |
+
|
| 233 |
+
file: [binary video data]
|
| 234 |
+
frame_rate: 5
|
| 235 |
+
language: en
|
| 236 |
+
backend: mediapipe
|
| 237 |
+
generate_annotated_video: false
|
| 238 |
+
model_name: gpt-4o
|
| 239 |
+
"""
|
| 240 |
+
start_time = time.time()
|
| 241 |
+
|
| 242 |
+
# Upload the video
|
| 243 |
+
logger.info(f"[upload_and_process_direct] Received request to upload and process file: {file.filename}")
|
| 244 |
+
video_service = VideoService(db)
|
| 245 |
+
upload_result = await video_service.upload_video(file)
|
| 246 |
+
video_id = upload_result.video_id
|
| 247 |
+
video_path = upload_result.video_url.replace("/uploads/", "static/uploads/")
|
| 248 |
+
upload_time = time.time() - start_time
|
| 249 |
+
logger.info(f"[upload_and_process_direct] Video uploaded in {upload_time:.2f} seconds with ID: {video_id}")
|
| 250 |
+
|
| 251 |
+
# Start processing directly
|
| 252 |
+
processing_start_time = time.time()
|
| 253 |
+
processing_service = ProcessingService(db)
|
| 254 |
+
|
| 255 |
+
# Update status
|
| 256 |
+
video_repo = processing_service.video_repo
|
| 257 |
+
video_repo.update_status(video_id, "processing")
|
| 258 |
+
|
| 259 |
+
logger.info(f"[upload_and_process_direct] Starting direct processing of video: {video_id}")
|
| 260 |
+
logger.info(f"[upload_and_process_direct] Video path: {video_path}")
|
| 261 |
+
|
| 262 |
+
try:
|
| 263 |
+
# Process the video directly
|
| 264 |
+
transcript, analysis = process_video(
|
| 265 |
+
video_path=video_path,
|
| 266 |
+
frame_rate=frame_rate,
|
| 267 |
+
backend=backend,
|
| 268 |
+
language=language,
|
| 269 |
+
generate_annotated_video=generate_annotated_video,
|
| 270 |
+
video_id=video_id,
|
| 271 |
+
status_callback=lambda progress: processing_service._update_progress(video_id, progress),
|
| 272 |
+
model_name=model_name
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
# Save results to database
|
| 276 |
+
results_repo = processing_service.results_repo
|
| 277 |
+
|
| 278 |
+
# Parse the analysis JSON
|
| 279 |
+
analysis_data = {}
|
| 280 |
+
try:
|
| 281 |
+
# Remove any surrounding code blocks that might be present
|
| 282 |
+
if analysis and isinstance(analysis, str):
|
| 283 |
+
# If it contains JSON block markers, extract just the JSON content
|
| 284 |
+
if "```json" in analysis:
|
| 285 |
+
analysis = analysis.split("```json", 1)[1].split("```", 1)[0]
|
| 286 |
+
elif "```" in analysis:
|
| 287 |
+
analysis = analysis.split("```", 1)[1].split("```", 1)[0]
|
| 288 |
+
|
| 289 |
+
# Parse the JSON
|
| 290 |
+
analysis_data = json.loads(analysis)
|
| 291 |
+
logger.info(f"[upload_and_process_direct] Successfully parsed analysis data")
|
| 292 |
+
elif analysis and isinstance(analysis, dict):
|
| 293 |
+
analysis_data = analysis
|
| 294 |
+
except Exception as e:
|
| 295 |
+
logger.error(f"[upload_and_process_direct] Error parsing analysis JSON: {str(e)}")
|
| 296 |
+
logger.error(f"[upload_and_process_direct] Raw analysis data: {analysis[:500]}...")
|
| 297 |
+
analysis_data = {"error": "Failed to parse analysis data"}
|
| 298 |
+
|
| 299 |
+
# Extract data from the comprehensive analysis
|
| 300 |
+
emotion_analysis = analysis_data.get("Emotion Analysis", {})
|
| 301 |
+
overall_summary = analysis_data.get("Overall Summary", "")
|
| 302 |
+
transcript_analysis = analysis_data.get("Transcript Analysis", {})
|
| 303 |
+
recommendations = analysis_data.get("Recommendations", {})
|
| 304 |
+
body_language_analysis = analysis_data.get("Body Language Analysis", {})
|
| 305 |
+
eye_contact_analysis = analysis_data.get("Eye Contact Analysis", {})
|
| 306 |
+
|
| 307 |
+
# Try both capitalized and non-capitalized versions
|
| 308 |
+
eye_contact_data = analysis_data.get("eye_contact_analysis", {})
|
| 309 |
+
body_language_data = analysis_data.get("body_language_analysis", {})
|
| 310 |
+
face_analysis_data = analysis_data.get("face_analysis", {})
|
| 311 |
+
|
| 312 |
+
if "eye_contact_analysis" not in analysis_data and "Eye Contact Analysis" in analysis_data:
|
| 313 |
+
eye_contact_data = analysis_data.get("Eye Contact Analysis", {})
|
| 314 |
+
|
| 315 |
+
if "body_language_analysis" not in analysis_data and "Body Language Analysis" in analysis_data:
|
| 316 |
+
body_language_data = analysis_data.get("Body Language Analysis", {})
|
| 317 |
+
|
| 318 |
+
if "face_analysis" not in analysis_data and "Face Analysis" in analysis_data:
|
| 319 |
+
face_analysis_data = analysis_data.get("Face Analysis", {})
|
| 320 |
+
|
| 321 |
+
# Create results record
|
| 322 |
+
results_repo.create(
|
| 323 |
+
video_id=video_id,
|
| 324 |
+
transcript=transcript or "",
|
| 325 |
+
emotion_analysis=emotion_analysis,
|
| 326 |
+
overall_summary=overall_summary or "Video processed successfully",
|
| 327 |
+
transcript_analysis=transcript_analysis,
|
| 328 |
+
recommendations=recommendations,
|
| 329 |
+
body_language_analysis=body_language_analysis,
|
| 330 |
+
body_language_data=body_language_data,
|
| 331 |
+
eye_contact_analysis=eye_contact_analysis,
|
| 332 |
+
eye_contact_data=eye_contact_data,
|
| 333 |
+
face_analysis_data=face_analysis_data
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
# Update video status
|
| 337 |
+
video_repo.update_status(video_id, "completed")
|
| 338 |
+
|
| 339 |
+
processing_time = time.time() - processing_start_time
|
| 340 |
+
logger.info(f"[upload_and_process_direct] Video processed in {processing_time:.2f} seconds")
|
| 341 |
+
|
| 342 |
+
# Get processing results
|
| 343 |
+
results = processing_service.get_processing_results(video_id)
|
| 344 |
+
|
| 345 |
+
# Add timing information
|
| 346 |
+
total_time = time.time() - start_time
|
| 347 |
+
results["timing"] = {
|
| 348 |
+
"upload_time_seconds": upload_time,
|
| 349 |
+
"processing_time_seconds": processing_time,
|
| 350 |
+
"total_time_seconds": total_time
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
log_success(logger, f"[upload_and_process_direct] Successfully processed video in {total_time:.2f} seconds")
|
| 354 |
+
return results
|
| 355 |
+
|
| 356 |
+
except Exception as e:
|
| 357 |
+
logger.error(f"[upload_and_process_direct] Error processing video: {str(e)}")
|
| 358 |
+
video_repo.update_status(video_id, "failed")
|
| 359 |
+
raise HTTPException(status_code=500, detail=f"Error processing video: {str(e)}")
|
| 360 |
+
|
| 361 |
+
@router.get("", response_model=List[VideoMetadata])
|
| 362 |
+
async def list_videos(
|
| 363 |
+
db: Session = Depends(get_db),
|
| 364 |
+
current_user: User = Depends(get_current_active_user)
|
| 365 |
+
):
|
| 366 |
+
"""
|
| 367 |
+
List all videos available in the system.
|
| 368 |
+
|
| 369 |
+
This endpoint retrieves metadata for all videos that have been
|
| 370 |
+
uploaded to the system.
|
| 371 |
+
|
| 372 |
+
Args:
|
| 373 |
+
db (Session): Database session dependency
|
| 374 |
+
current_user (User): Current active user dependency
|
| 375 |
+
|
| 376 |
+
Returns:
|
| 377 |
+
List[VideoMetadata]: List of metadata objects for all available videos
|
| 378 |
+
|
| 379 |
+
Example:
|
| 380 |
+
GET /videos
|
| 381 |
+
"""
|
| 382 |
+
logger.info("[list_videos] Retrieving list of all videos")
|
| 383 |
+
video_service = VideoService(db)
|
| 384 |
+
videos = video_service.list_videos()
|
| 385 |
+
logger.info(f"[list_videos] Found {len(videos)} videos")
|
| 386 |
+
return videos
|
| 387 |
+
|
| 388 |
+
@router.get("/{video_id}", response_model=VideoMetadata)
|
| 389 |
+
async def get_video_metadata(
|
| 390 |
+
video_id: str,
|
| 391 |
+
db: Session = Depends(get_db),
|
| 392 |
+
current_user: User = Depends(get_current_active_user)
|
| 393 |
+
):
|
| 394 |
+
"""
|
| 395 |
+
Get metadata for a specific video.
|
| 396 |
+
|
| 397 |
+
This endpoint retrieves detailed metadata for a specific video
|
| 398 |
+
identified by its unique ID.
|
| 399 |
+
|
| 400 |
+
Args:
|
| 401 |
+
video_id (str): Unique identifier for the video
|
| 402 |
+
db (Session): Database session dependency
|
| 403 |
+
current_user (User): Current active user dependency
|
| 404 |
+
|
| 405 |
+
Returns:
|
| 406 |
+
VideoMetadata: Metadata object for the requested video
|
| 407 |
+
|
| 408 |
+
Example:
|
| 409 |
+
GET /videos/vid-12345
|
| 410 |
+
"""
|
| 411 |
+
logger.info(f"[get_video_metadata] Retrieving metadata for video ID: {video_id}")
|
| 412 |
+
video_service = VideoService(db)
|
| 413 |
+
metadata = video_service.get_video_metadata(video_id)
|
| 414 |
+
logger.info(f"[get_video_metadata] Retrieved metadata for video: {metadata.original_filename}")
|
| 415 |
+
return metadata
|
| 416 |
+
|
| 417 |
+
@router.delete("/{video_id}", status_code=204)
|
| 418 |
+
async def delete_video(
|
| 419 |
+
video_id: str,
|
| 420 |
+
db: Session = Depends(get_db),
|
| 421 |
+
current_user: User = Depends(get_current_active_user)
|
| 422 |
+
):
|
| 423 |
+
"""
|
| 424 |
+
Delete a specific video.
|
| 425 |
+
|
| 426 |
+
This endpoint deletes a video and its associated file from the system.
|
| 427 |
+
|
| 428 |
+
Args:
|
| 429 |
+
video_id (str): Unique identifier for the video to delete
|
| 430 |
+
db (Session): Database session dependency
|
| 431 |
+
current_user (User): Current active user dependency
|
| 432 |
+
|
| 433 |
+
Returns:
|
| 434 |
+
204 No Content on success
|
| 435 |
+
|
| 436 |
+
Example:
|
| 437 |
+
DELETE /videos/vid-12345
|
| 438 |
+
"""
|
| 439 |
+
logger.info(f"[delete_video] Attempting to delete video ID: {video_id}")
|
| 440 |
+
video_service = VideoService(db)
|
| 441 |
+
|
| 442 |
+
try:
|
| 443 |
+
result = video_service.delete_video(video_id)
|
| 444 |
+
if result:
|
| 445 |
+
log_success(logger, f"[delete_video] Successfully deleted video ID: {video_id}")
|
| 446 |
+
return None
|
| 447 |
+
else:
|
| 448 |
+
raise HTTPException(status_code=404, detail=f"Video with ID {video_id} not found")
|
| 449 |
+
except VideoNotFoundError:
|
| 450 |
+
logger.warning(f"[delete_video] Video not found with ID: {video_id}")
|
| 451 |
+
raise HTTPException(status_code=404, detail=f"Video with ID {video_id} not found")
|
| 452 |
+
except Exception as e:
|
| 453 |
+
logger.error(f"[delete_video] Error deleting video ID {video_id}: {str(e)}")
|
| 454 |
+
raise HTTPException(status_code=500, detail=f"Error deleting video: {str(e)}")
|
behavior_backend/app/core/README_AUTH.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Authentication System Documentation
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
This document describes the authentication system implemented for the EmotiVid API. The system uses OAuth2 with JWT (JSON Web Tokens) for secure authentication and authorization.
|
| 6 |
+
|
| 7 |
+
## Authentication Flow
|
| 8 |
+
|
| 9 |
+
1. **User Registration**: Users register with their email, password, and other required information.
|
| 10 |
+
2. **User Login**: Users login with their credentials and receive a JWT token.
|
| 11 |
+
3. **Protected Endpoints**: All API endpoints require a valid JWT token for access.
|
| 12 |
+
|
| 13 |
+
## Implementation Details
|
| 14 |
+
|
| 15 |
+
### JWT Token
|
| 16 |
+
|
| 17 |
+
- **Token Format**: The JWT token contains the user ID in the `sub` claim.
|
| 18 |
+
- **Token Expiration**: Tokens expire after 30 minutes by default (configurable in settings).
|
| 19 |
+
- **Token Signing**: Tokens are signed using the HS256 algorithm with a secret key.
|
| 20 |
+
|
| 21 |
+
### Authentication Endpoints
|
| 22 |
+
|
| 23 |
+
- **POST /api/auth/login**: OAuth2 compatible login endpoint that accepts form data.
|
| 24 |
+
- **POST /api/auth/login/email**: Alternative login endpoint that accepts JSON with email and password.
|
| 25 |
+
|
| 26 |
+
### User Endpoints
|
| 27 |
+
|
| 28 |
+
- **POST /api/users/**: Create a new user (registration).
|
| 29 |
+
- **GET /api/users/me**: Get current user information.
|
| 30 |
+
- **GET /api/users/{user_id}**: Get user information by ID.
|
| 31 |
+
|
| 32 |
+
## How to Use
|
| 33 |
+
|
| 34 |
+
### Registration
|
| 35 |
+
|
| 36 |
+
```python
|
| 37 |
+
import requests
|
| 38 |
+
|
| 39 |
+
response = requests.post(
|
| 40 |
+
"http://localhost:8000/api/users/",
|
| 41 |
+
json={
|
| 42 |
+
"email": "user@example.com",
|
| 43 |
+
"password": "securepassword",
|
| 44 |
+
"first_name": "John",
|
| 45 |
+
"last_name": "Doe"
|
| 46 |
+
}
|
| 47 |
+
)
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
### Login
|
| 51 |
+
|
| 52 |
+
```python
|
| 53 |
+
import requests
|
| 54 |
+
|
| 55 |
+
response = requests.post(
|
| 56 |
+
"http://localhost:8000/api/auth/login",
|
| 57 |
+
data={
|
| 58 |
+
"username": "user@example.com", # Note: OAuth2 uses 'username' for email
|
| 59 |
+
"password": "securepassword"
|
| 60 |
+
}
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
token = response.json()["access_token"]
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### Accessing Protected Endpoints
|
| 67 |
+
|
| 68 |
+
```python
|
| 69 |
+
import requests
|
| 70 |
+
|
| 71 |
+
headers = {
|
| 72 |
+
"Authorization": f"Bearer {token}"
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
response = requests.get(
|
| 76 |
+
"http://localhost:8000/api/users/me",
|
| 77 |
+
headers=headers
|
| 78 |
+
)
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
## Security Considerations
|
| 82 |
+
|
| 83 |
+
- The secret key should be kept secure and not committed to version control.
|
| 84 |
+
- In production, use HTTPS to prevent token interception.
|
| 85 |
+
- Consider implementing token refresh functionality for long-lived sessions.
|
| 86 |
+
- Implement rate limiting to prevent brute force attacks.
|
| 87 |
+
|
| 88 |
+
## Dependencies
|
| 89 |
+
|
| 90 |
+
- `python-jose`: For JWT token handling.
|
| 91 |
+
- `passlib`: For password hashing.
|
| 92 |
+
- `bcrypt`: For secure password hashing algorithm.
|
| 93 |
+
- `fastapi`: For OAuth2 password flow implementation.
|
behavior_backend/app/core/__init__.py
ADDED
|
File without changes
|
behavior_backend/app/core/config.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from pydantic_settings import BaseSettings
|
| 5 |
+
import secrets
|
| 6 |
+
|
| 7 |
+
# Load environment variables from .env file
|
| 8 |
+
load_dotenv(override=True)
|
| 9 |
+
|
| 10 |
+
# Base directory
|
| 11 |
+
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Settings(BaseSettings):
|
| 15 |
+
"""Application settings."""
|
| 16 |
+
|
| 17 |
+
# API settings
|
| 18 |
+
API_V1_STR: str = "/api"
|
| 19 |
+
PROJECT_NAME: str = "EmotiVid API"
|
| 20 |
+
PROJECT_DESCRIPTION: str = "API for video emotion analysis"
|
| 21 |
+
VERSION: str = "1.0.0"
|
| 22 |
+
|
| 23 |
+
# Database settings
|
| 24 |
+
DATABASE_URL: str = os.getenv("DATABASE_URL", f"sqlite:///{BASE_DIR}/app.db")
|
| 25 |
+
|
| 26 |
+
# File storage settings
|
| 27 |
+
UPLOAD_DIR: Path = BASE_DIR / "static" / "uploads"
|
| 28 |
+
RESULTS_DIR: Path = BASE_DIR / "static" / "results"
|
| 29 |
+
|
| 30 |
+
# OpenAI settings
|
| 31 |
+
OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
|
| 32 |
+
|
| 33 |
+
# Groq settings
|
| 34 |
+
GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 35 |
+
|
| 36 |
+
# CORS settings
|
| 37 |
+
CORS_ORIGINS: list = ["*"] # In production, replace with specific frontend URL
|
| 38 |
+
|
| 39 |
+
# JWT settings
|
| 40 |
+
SECRET_KEY: str = os.getenv("SECRET_KEY", secrets.token_urlsafe(32))
|
| 41 |
+
ALGORITHM: str = "HS256"
|
| 42 |
+
ACCESS_TOKEN_EXPIRE_MINUTES: int = 1440 * 3 # 72 hours (3 days) instead of 24 hours
|
| 43 |
+
|
| 44 |
+
# API Key settings
|
| 45 |
+
API_KEY: str = os.getenv("API_KEY", secrets.token_urlsafe(32))
|
| 46 |
+
|
| 47 |
+
class Config:
|
| 48 |
+
env_file = ".env"
|
| 49 |
+
case_sensitive = True
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Create settings instance
|
| 53 |
+
settings = Settings()
|
| 54 |
+
|
| 55 |
+
# Ensure directories exist
|
| 56 |
+
settings.UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
| 57 |
+
settings.RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
behavior_backend/app/core/exceptions.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import HTTPException, status
|
| 2 |
+
|
| 3 |
+
class VideoNotFoundError(HTTPException):
|
| 4 |
+
"""Exception raised when a video is not found."""
|
| 5 |
+
|
| 6 |
+
def __init__(self, video_id: str):
|
| 7 |
+
super().__init__(
|
| 8 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
| 9 |
+
detail=f"Video with ID {video_id} not found"
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
class ResultNotFoundError(HTTPException):
|
| 13 |
+
"""Exception raised when a processing result is not found."""
|
| 14 |
+
|
| 15 |
+
def __init__(self, video_id: str):
|
| 16 |
+
super().__init__(
|
| 17 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
| 18 |
+
detail=f"Processing result for video with ID {video_id} not found"
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
class VideoUploadError(HTTPException):
|
| 22 |
+
"""Exception raised when there is an error uploading a video."""
|
| 23 |
+
|
| 24 |
+
def __init__(self, detail: str = "Error uploading video"):
|
| 25 |
+
super().__init__(
|
| 26 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 27 |
+
detail=detail
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
class VideoProcessingError(HTTPException):
|
| 31 |
+
"""Exception raised when there is an error processing a video."""
|
| 32 |
+
|
| 33 |
+
def __init__(self, detail: str = "Error processing video"):
|
| 34 |
+
super().__init__(
|
| 35 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 36 |
+
detail=detail
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
class InvalidParameterError(HTTPException):
|
| 40 |
+
"""Exception raised when a parameter is invalid."""
|
| 41 |
+
|
| 42 |
+
def __init__(self, parameter: str, detail: str = None):
|
| 43 |
+
super().__init__(
|
| 44 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 45 |
+
detail=detail or f"Invalid parameter: {parameter}"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
class DatabaseError(HTTPException):
|
| 49 |
+
"""Exception raised when there is a database error."""
|
| 50 |
+
|
| 51 |
+
def __init__(self, detail: str = "Database error"):
|
| 52 |
+
super().__init__(
|
| 53 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 54 |
+
detail=detail
|
| 55 |
+
)
|
behavior_backend/app/db/__init__.py
ADDED
|
File without changes
|
behavior_backend/app/db/base.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine
|
| 2 |
+
from sqlalchemy.ext.declarative import declarative_base
|
| 3 |
+
from sqlalchemy.orm import sessionmaker
|
| 4 |
+
|
| 5 |
+
from app.core.config import settings
|
| 6 |
+
|
| 7 |
+
# Create SQLAlchemy engine
|
| 8 |
+
engine = create_engine(settings.DATABASE_URL)
|
| 9 |
+
|
| 10 |
+
# Create session factory
|
| 11 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 12 |
+
|
| 13 |
+
# Create base class for models
|
| 14 |
+
Base = declarative_base()
|
| 15 |
+
|
| 16 |
+
# Function to get database session
|
| 17 |
+
def get_db():
|
| 18 |
+
"""Dependency for getting DB session."""
|
| 19 |
+
db = SessionLocal()
|
| 20 |
+
try:
|
| 21 |
+
yield db
|
| 22 |
+
finally:
|
| 23 |
+
db.close()
|
| 24 |
+
|
| 25 |
+
# Function to create all tables
|
| 26 |
+
def create_tables():
|
| 27 |
+
"""Create all tables in the database."""
|
| 28 |
+
Base.metadata.create_all(bind=engine)
|
behavior_backend/app/db/models.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, String, Integer, Float, DateTime, Boolean, Text, LargeBinary, JSON, ForeignKey
|
| 2 |
+
from datetime import datetime, timezone
|
| 3 |
+
|
| 4 |
+
from app.db.base import Base
|
| 5 |
+
|
| 6 |
+
class Video(Base):
|
| 7 |
+
"""Database model for video metadata."""
|
| 8 |
+
__tablename__ = "videos"
|
| 9 |
+
|
| 10 |
+
id = Column(String, primary_key=True, index=True)
|
| 11 |
+
original_filename = Column(String, nullable=False)
|
| 12 |
+
upload_date = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 13 |
+
file_path = Column(String, nullable=False)
|
| 14 |
+
size = Column(Integer, nullable=False)
|
| 15 |
+
duration = Column(Float, nullable=True)
|
| 16 |
+
status = Column(String, default="uploaded")
|
| 17 |
+
|
| 18 |
+
class ProcessingResult(Base):
|
| 19 |
+
"""Database model for video processing results."""
|
| 20 |
+
__tablename__ = "processing_results"
|
| 21 |
+
|
| 22 |
+
id = Column(String, primary_key=True, index=True)
|
| 23 |
+
video_id = Column(String, ForeignKey('videos.id'), nullable=False)
|
| 24 |
+
processing_date = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 25 |
+
transcript = Column(Text, nullable=True)
|
| 26 |
+
emotion_analysis = Column(JSON, nullable=True)
|
| 27 |
+
overall_summary = Column(Text, nullable=True)
|
| 28 |
+
transcript_analysis = Column(JSON, nullable=True)
|
| 29 |
+
recommendations = Column(JSON, nullable=True)
|
| 30 |
+
body_language_analysis = Column(JSON, nullable=True)
|
| 31 |
+
body_language_data = Column(JSON, nullable=True)
|
| 32 |
+
eye_contact_analysis = Column(JSON, nullable=True)
|
| 33 |
+
eye_contact_data = Column(JSON, nullable=True)
|
| 34 |
+
face_analysis_data = Column(JSON, nullable=True)
|
| 35 |
+
|
| 36 |
+
class User(Base):
|
| 37 |
+
"""Database model for user data."""
|
| 38 |
+
__tablename__ = "users"
|
| 39 |
+
|
| 40 |
+
id = Column(String, primary_key=True, index=True)
|
| 41 |
+
email = Column(String, unique=True, index=True, nullable=False)
|
| 42 |
+
first_name = Column(String, nullable=False)
|
| 43 |
+
last_name = Column(String, nullable=False)
|
| 44 |
+
hashed_password = Column(String, nullable=False)
|
| 45 |
+
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 46 |
+
updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
|
| 47 |
+
is_active = Column(Boolean, default=True)
|
behavior_backend/app/db/repositories/__init__.py
ADDED
|
File without changes
|
behavior_backend/app/db/repositories/results.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy.orm import Session
|
| 2 |
+
from typing import Optional, Dict, Any
|
| 3 |
+
import uuid
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
from app.db.models import ProcessingResult
|
| 7 |
+
|
| 8 |
+
class ResultsRepository:
|
| 9 |
+
"""Repository for processing results database operations."""
|
| 10 |
+
|
| 11 |
+
def __init__(self, db: Session):
|
| 12 |
+
self.db = db
|
| 13 |
+
|
| 14 |
+
def create(
|
| 15 |
+
self,
|
| 16 |
+
video_id: str,
|
| 17 |
+
transcript: str = "",
|
| 18 |
+
emotion_analysis: Dict[str, Any] = None,
|
| 19 |
+
overall_summary: str = "",
|
| 20 |
+
transcript_analysis: Dict[str, Any] = None,
|
| 21 |
+
recommendations: Dict[str, Any] = None,
|
| 22 |
+
body_language_analysis: Dict[str, Any] = None,
|
| 23 |
+
body_language_data: Dict[str, Any] = None,
|
| 24 |
+
eye_contact_analysis: Dict[str, Any] = None,
|
| 25 |
+
eye_contact_data: Dict[str, Any] = None,
|
| 26 |
+
face_analysis_data: Dict[str, Any] = None
|
| 27 |
+
) -> ProcessingResult:
|
| 28 |
+
"""Create a new processing result entry in the database."""
|
| 29 |
+
result_id = str(uuid.uuid4())
|
| 30 |
+
db_result = ProcessingResult(
|
| 31 |
+
id=result_id,
|
| 32 |
+
video_id=video_id,
|
| 33 |
+
processing_date=datetime.now(),
|
| 34 |
+
transcript=transcript,
|
| 35 |
+
emotion_analysis=emotion_analysis or {},
|
| 36 |
+
overall_summary=overall_summary,
|
| 37 |
+
transcript_analysis=transcript_analysis or {},
|
| 38 |
+
recommendations=recommendations or {},
|
| 39 |
+
body_language_analysis=body_language_analysis or {},
|
| 40 |
+
body_language_data=body_language_data or {},
|
| 41 |
+
eye_contact_analysis=eye_contact_analysis or {},
|
| 42 |
+
eye_contact_data=eye_contact_data or {},
|
| 43 |
+
face_analysis_data=face_analysis_data or {}
|
| 44 |
+
)
|
| 45 |
+
self.db.add(db_result)
|
| 46 |
+
self.db.commit()
|
| 47 |
+
self.db.refresh(db_result)
|
| 48 |
+
return db_result
|
| 49 |
+
|
| 50 |
+
def get_by_video_id(self, video_id: str) -> Optional[ProcessingResult]:
|
| 51 |
+
"""Get processing results for a video by its ID."""
|
| 52 |
+
return self.db.query(ProcessingResult).filter(ProcessingResult.video_id == str(video_id)).first()
|
| 53 |
+
|
| 54 |
+
def update(
|
| 55 |
+
self,
|
| 56 |
+
video_id: str,
|
| 57 |
+
transcript: str = None,
|
| 58 |
+
emotion_analysis: Dict[str, Any] = None,
|
| 59 |
+
overall_summary: str = None,
|
| 60 |
+
transcript_analysis: Dict[str, Any] = None,
|
| 61 |
+
recommendations: Dict[str, Any] = None,
|
| 62 |
+
body_language_analysis: Dict[str, Any] = None,
|
| 63 |
+
body_language_data: Dict[str, Any] = None,
|
| 64 |
+
eye_contact_analysis: Dict[str, Any] = None,
|
| 65 |
+
eye_contact_data: Dict[str, Any] = None,
|
| 66 |
+
face_analysis_data: Dict[str, Any] = None
|
| 67 |
+
) -> Optional[ProcessingResult]:
|
| 68 |
+
"""Update processing results for a video."""
|
| 69 |
+
db_result = self.get_by_video_id(video_id)
|
| 70 |
+
if db_result:
|
| 71 |
+
if transcript is not None:
|
| 72 |
+
db_result.transcript = transcript
|
| 73 |
+
if emotion_analysis is not None:
|
| 74 |
+
db_result.emotion_analysis = emotion_analysis
|
| 75 |
+
if overall_summary is not None:
|
| 76 |
+
db_result.overall_summary = overall_summary
|
| 77 |
+
if transcript_analysis is not None:
|
| 78 |
+
db_result.transcript_analysis = transcript_analysis
|
| 79 |
+
if recommendations is not None:
|
| 80 |
+
db_result.recommendations = recommendations
|
| 81 |
+
if body_language_analysis is not None:
|
| 82 |
+
db_result.body_language_analysis = body_language_analysis
|
| 83 |
+
if body_language_data is not None:
|
| 84 |
+
db_result.body_language_data = body_language_data
|
| 85 |
+
if eye_contact_analysis is not None:
|
| 86 |
+
db_result.eye_contact_analysis = eye_contact_analysis
|
| 87 |
+
if eye_contact_data is not None:
|
| 88 |
+
db_result.eye_contact_data = eye_contact_data
|
| 89 |
+
if face_analysis_data is not None:
|
| 90 |
+
db_result.face_analysis_data = face_analysis_data
|
| 91 |
+
|
| 92 |
+
self.db.commit()
|
| 93 |
+
self.db.refresh(db_result)
|
| 94 |
+
return db_result
|
| 95 |
+
|
| 96 |
+
def delete_by_video_id(self, video_id: str) -> bool:
|
| 97 |
+
"""Delete processing results for a video by its ID.
|
| 98 |
+
|
| 99 |
+
Args:
|
| 100 |
+
video_id: ID of the video
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
bool: True if the results were deleted, False if they didn't exist
|
| 104 |
+
"""
|
| 105 |
+
db_result = self.get_by_video_id(video_id)
|
| 106 |
+
if db_result:
|
| 107 |
+
self.db.delete(db_result)
|
| 108 |
+
self.db.commit()
|
| 109 |
+
return True
|
| 110 |
+
return False
|
behavior_backend/app/db/repositories/video.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy.orm import Session
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
import uuid
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
from app.db.models import Video
|
| 7 |
+
|
| 8 |
+
class VideoRepository:
|
| 9 |
+
"""Repository for video database operations."""
|
| 10 |
+
|
| 11 |
+
def __init__(self, db: Session):
|
| 12 |
+
self.db = db
|
| 13 |
+
|
| 14 |
+
def create(self, original_filename: str, file_path: str, size: int, duration: Optional[float] = None) -> Video:
|
| 15 |
+
"""Create a new video entry in the database."""
|
| 16 |
+
video_id = str(uuid.uuid4())
|
| 17 |
+
db_video = Video(
|
| 18 |
+
id=video_id,
|
| 19 |
+
original_filename=original_filename,
|
| 20 |
+
upload_date=datetime.now(),
|
| 21 |
+
file_path=file_path,
|
| 22 |
+
size=size,
|
| 23 |
+
duration=duration,
|
| 24 |
+
status="uploaded"
|
| 25 |
+
)
|
| 26 |
+
# Add the video to the database
|
| 27 |
+
self.db.add(db_video)
|
| 28 |
+
# Commit the transaction
|
| 29 |
+
self.db.commit()
|
| 30 |
+
# Refresh the video object to get the latest state
|
| 31 |
+
self.db.refresh(db_video)
|
| 32 |
+
return db_video
|
| 33 |
+
|
| 34 |
+
def get_by_id(self, video_id: str) -> Optional[Video]:
|
| 35 |
+
"""Get a video by its ID."""
|
| 36 |
+
return self.db.query(Video).filter(Video.id == str(video_id)).first()
|
| 37 |
+
|
| 38 |
+
def get_all(self) -> List[Video]:
|
| 39 |
+
"""Get all videos."""
|
| 40 |
+
return self.db.query(Video).all()
|
| 41 |
+
|
| 42 |
+
def update_status(self, video_id: str, status: str) -> Optional[Video]:
|
| 43 |
+
"""Update the status of a video."""
|
| 44 |
+
db_video = self.get_by_id(video_id)
|
| 45 |
+
if db_video:
|
| 46 |
+
db_video.status = status
|
| 47 |
+
self.db.commit()
|
| 48 |
+
self.db.refresh(db_video)
|
| 49 |
+
return db_video
|
| 50 |
+
|
| 51 |
+
def update_progress(self, video_id: str, progress: float) -> Optional[Video]:
|
| 52 |
+
"""Update the processing progress of a video."""
|
| 53 |
+
db_video = self.get_by_id(video_id)
|
| 54 |
+
if db_video:
|
| 55 |
+
# Ensure progress is between 0 and 100
|
| 56 |
+
progress = max(0, min(100, progress))
|
| 57 |
+
# Format with 2 decimal places
|
| 58 |
+
db_video.status = f"processing ({progress:.2f}%)"
|
| 59 |
+
# Commit immediately to ensure changes are visible to other connections
|
| 60 |
+
self.db.commit()
|
| 61 |
+
self.db.refresh(db_video)
|
| 62 |
+
return db_video
|
| 63 |
+
|
| 64 |
+
def delete(self, video_id: str) -> bool:
|
| 65 |
+
"""Delete a video by its ID.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
video_id: ID of the video to delete
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
bool: True if the video was deleted, False if it didn't exist
|
| 72 |
+
"""
|
| 73 |
+
db_video = self.get_by_id(video_id)
|
| 74 |
+
if db_video:
|
| 75 |
+
self.db.delete(db_video)
|
| 76 |
+
self.db.commit()
|
| 77 |
+
return True
|
| 78 |
+
return False
|
behavior_backend/app/models/__init__.py
ADDED
|
File without changes
|
behavior_backend/app/models/processing.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import Optional, List, Dict, Any
|
| 3 |
+
|
| 4 |
+
class ProcessingRequest(BaseModel):
|
| 5 |
+
"""Model for video processing request."""
|
| 6 |
+
video_id: str
|
| 7 |
+
frame_rate: int = Field(1, ge=1, le=90, description="Skip frames for processing (1-90)")
|
| 8 |
+
backend: str = Field("opencv", description="Backend for face detection")
|
| 9 |
+
language: str = Field("en", description="Language of the video")
|
| 10 |
+
generate_annotated_video: bool = Field(False, description="Generate annotated video")
|
| 11 |
+
model_name: str = Field("gpt-4o", description="AI model to use for analysis")
|
| 12 |
+
|
| 13 |
+
class ProcessingStatus(BaseModel):
|
| 14 |
+
"""Model for video processing status response."""
|
| 15 |
+
video_id: str
|
| 16 |
+
status: str
|
| 17 |
+
progress: Optional[float] = None
|
| 18 |
+
error: Optional[str] = None
|
| 19 |
+
|
| 20 |
+
class EmotionData(BaseModel):
|
| 21 |
+
"""Model for emotion data at a specific frame."""
|
| 22 |
+
frame_index: int
|
| 23 |
+
data: List[Dict[str, Any]]
|
| 24 |
+
|
| 25 |
+
class ProcessingResult(BaseModel):
|
| 26 |
+
"""Model for video processing results response."""
|
| 27 |
+
video_id: str
|
| 28 |
+
emotion_data: Dict[str, List[EmotionData]]
|
| 29 |
+
transcript: str
|
| 30 |
+
analysis: str
|
| 31 |
+
annotated_video_available: bool
|
| 32 |
+
emotion_percentages: Optional[Dict[str, Any]] = None
|
| 33 |
+
overall_sentiment: Optional[str] = None
|
| 34 |
+
frame_emotions_count: Optional[int] = None
|
| 35 |
+
overall_summary: Optional[str] = None
|
| 36 |
+
transcript_analysis: Optional[Dict[str, Any]] = None
|
| 37 |
+
recommendations: Optional[Dict[str, Any]] = None
|
| 38 |
+
body_language_analysis: Optional[Dict[str, Any]] = None
|
| 39 |
+
body_language_data: Optional[Dict[str, Any]] = None
|
| 40 |
+
eye_contact_analysis: Optional[Dict[str, Any]] = None
|
| 41 |
+
eye_contact_data: Optional[Dict[str, Any]] = None
|
| 42 |
+
|
| 43 |
+
class Config:
|
| 44 |
+
from_attributes = True
|
behavior_backend/app/models/token.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class Token(BaseModel):
|
| 6 |
+
"""Token schema for access token response."""
|
| 7 |
+
access_token: str
|
| 8 |
+
token_type: str
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class TokenData(BaseModel):
|
| 12 |
+
"""Token data schema for decoded JWT payload."""
|
| 13 |
+
user_id: Optional[str] = None
|
behavior_backend/app/models/user.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, EmailStr, Field
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 5 |
+
from sqlalchemy import select
|
| 6 |
+
|
| 7 |
+
class UserBase(BaseModel):
|
| 8 |
+
"""Base model for user data."""
|
| 9 |
+
email: EmailStr
|
| 10 |
+
first_name: str
|
| 11 |
+
last_name: str
|
| 12 |
+
|
| 13 |
+
class UserCreate(UserBase):
|
| 14 |
+
"""Model for creating a new user."""
|
| 15 |
+
password: str
|
| 16 |
+
|
| 17 |
+
class User(UserBase):
|
| 18 |
+
"""Model for user response."""
|
| 19 |
+
id: str
|
| 20 |
+
created_at: datetime
|
| 21 |
+
is_active: bool
|
| 22 |
+
|
| 23 |
+
class Config:
|
| 24 |
+
from_attributes = True
|
| 25 |
+
|
| 26 |
+
class UserLogin(BaseModel):
|
| 27 |
+
email: EmailStr
|
| 28 |
+
password: str
|
| 29 |
+
|
| 30 |
+
class UserOut(BaseModel):
|
| 31 |
+
id: str
|
| 32 |
+
email: EmailStr
|
| 33 |
+
first_name: str
|
| 34 |
+
last_name: str
|
| 35 |
+
|
| 36 |
+
class Config:
|
| 37 |
+
from_attributes = True
|
behavior_backend/app/models/video.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import Optional, Dict, Any, List
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
class VideoBase(BaseModel):
|
| 6 |
+
"""Base model for video data."""
|
| 7 |
+
original_filename: str
|
| 8 |
+
|
| 9 |
+
class VideoCreate(VideoBase):
|
| 10 |
+
"""Model for creating a new video."""
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
class VideoMetadata(VideoBase):
|
| 14 |
+
"""Model for video metadata response."""
|
| 15 |
+
video_id: str
|
| 16 |
+
upload_date: str
|
| 17 |
+
size: int
|
| 18 |
+
status: str
|
| 19 |
+
duration: Optional[float] = None
|
| 20 |
+
video_url: Optional[str] = None
|
| 21 |
+
|
| 22 |
+
class Config:
|
| 23 |
+
from_attributes = True
|
| 24 |
+
|
| 25 |
+
class VideoAnalysisResponse(BaseModel):
|
| 26 |
+
"""Model for video analysis response."""
|
| 27 |
+
video_id: str
|
| 28 |
+
transcript: Optional[str] = None
|
| 29 |
+
emotion_analysis: Optional[Dict[str, Any]] = None
|
| 30 |
+
overall_summary: Optional[str] = None
|
| 31 |
+
transcript_analysis: Optional[Dict[str, Any]] = None
|
| 32 |
+
recommendations: Optional[Dict[str, Any]] = None
|
| 33 |
+
body_language_analysis: Optional[Dict[str, Any]] = None
|
| 34 |
+
eye_contact_analysis: Optional[Dict[str, Any]] = None
|
| 35 |
+
face_analysis_data: Optional[Dict[str, Any]] = None
|
| 36 |
+
|
| 37 |
+
class Config:
|
| 38 |
+
from_attributes = True
|
behavior_backend/app/services/__init__.py
ADDED
|
File without changes
|
behavior_backend/app/services/processing/__init__.py
ADDED
|
File without changes
|
behavior_backend/app/services/processing/ai_analysis.py
ADDED
|
@@ -0,0 +1,850 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import openai
|
| 6 |
+
from typing import Dict, Any, List, Optional
|
| 7 |
+
|
| 8 |
+
# Fix import paths
|
| 9 |
+
try:
|
| 10 |
+
from app.utils.logging_utils import time_it, setup_logger
|
| 11 |
+
from app.core.config import settings
|
| 12 |
+
except ImportError:
|
| 13 |
+
# Try relative imports for running from project root
|
| 14 |
+
from behavior_backend.app.utils.logging_utils import time_it, setup_logger
|
| 15 |
+
# Mock settings for testing
|
| 16 |
+
class Settings:
|
| 17 |
+
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
| 18 |
+
|
| 19 |
+
settings = Settings()
|
| 20 |
+
|
| 21 |
+
# Configure logging
|
| 22 |
+
logger = setup_logger(__name__)
|
| 23 |
+
|
| 24 |
+
class AIAnalysisService:
|
| 25 |
+
"""Service for AI analysis operations."""
|
| 26 |
+
|
| 27 |
+
def __init__(self):
|
| 28 |
+
"""Initialize the AI analysis service."""
|
| 29 |
+
self.client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY", ""))
|
| 30 |
+
|
| 31 |
+
@time_it
|
| 32 |
+
def analyze_emotions_and_transcript(
|
| 33 |
+
self,
|
| 34 |
+
emotion_df: pd.DataFrame,
|
| 35 |
+
transcript: str,
|
| 36 |
+
language: str = 'en',
|
| 37 |
+
interview_assessment: Optional[Dict[str, Any]] = None,
|
| 38 |
+
eye_contact_data: Optional[Dict[str, Any]] = None,
|
| 39 |
+
body_language_data: Optional[Dict[str, Any]] = None,
|
| 40 |
+
face_analysis_data: Optional[Dict[str, Any]] = None,
|
| 41 |
+
model_name: str = "gpt-4o"
|
| 42 |
+
) -> Dict[str, Any]:
|
| 43 |
+
"""
|
| 44 |
+
Analyze emotions and transcript using OpenAI.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
emotion_df: DataFrame with emotion data
|
| 48 |
+
transcript: Transcript text
|
| 49 |
+
language: Language of the transcript
|
| 50 |
+
interview_assessment: Optional interview assessment
|
| 51 |
+
eye_contact_data: Optional eye contact analysis data
|
| 52 |
+
body_language_data: Optional body language analysis data
|
| 53 |
+
face_analysis_data: Optional face analysis data
|
| 54 |
+
model_name: The name of the model to use for AI analysis
|
| 55 |
+
Returns:
|
| 56 |
+
Dictionary with analysis results
|
| 57 |
+
"""
|
| 58 |
+
print("*******************************I AM INSIDE AI ANALYSER *******************************************************")
|
| 59 |
+
logger.info(f"Received interview assessment: {interview_assessment}")
|
| 60 |
+
logger.info(f"Received transcript: {transcript}")
|
| 61 |
+
logger.info(f"Received language: {language}")
|
| 62 |
+
logger.info(f"Received emotion_df: {emotion_df}")
|
| 63 |
+
logger.info(f"Received eye contact data: {eye_contact_data is not None}")
|
| 64 |
+
logger.info(f"Received body language data: {body_language_data is not None}")
|
| 65 |
+
logger.info(f"Received face analysis data: {face_analysis_data is not None}")
|
| 66 |
+
logger.info(f"Using AI model: {model_name}")
|
| 67 |
+
|
| 68 |
+
# Check if emotion_df is empty or None
|
| 69 |
+
if emotion_df is None or emotion_df.empty:
|
| 70 |
+
logger.warning("No emotion data available for analysis")
|
| 71 |
+
return self._generate_empty_analysis()
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
# Extract raw emotion scores from the DataFrame
|
| 75 |
+
raw_emotions = {}
|
| 76 |
+
confidence_by_emotion = {}
|
| 77 |
+
average_confidence = 0
|
| 78 |
+
confidence_data = {}
|
| 79 |
+
|
| 80 |
+
# Get primary emotion data from the first row of the DataFrame
|
| 81 |
+
if not emotion_df.empty and 'raw_emotion_data' in emotion_df.columns:
|
| 82 |
+
first_row = emotion_df.iloc[0]
|
| 83 |
+
if isinstance(first_row['raw_emotion_data'], dict) and first_row['raw_emotion_data']:
|
| 84 |
+
raw_emotions = first_row['raw_emotion_data']
|
| 85 |
+
logger.info(f"Using raw_emotion_data from DataFrame: {raw_emotions}")
|
| 86 |
+
|
| 87 |
+
# Check if confidence data is available in the first row (this would be the "confidence_data" field)
|
| 88 |
+
if 'confidence_data' in emotion_df.columns and isinstance(first_row.get('confidence_data'), dict):
|
| 89 |
+
confidence_data = first_row['confidence_data']
|
| 90 |
+
confidence_by_emotion = confidence_data.get('confidence_by_emotion', {})
|
| 91 |
+
average_confidence = confidence_data.get('average_confidence', 0)
|
| 92 |
+
|
| 93 |
+
# Round confidence values to 2 decimal places
|
| 94 |
+
confidence_by_emotion = {emotion: round(value, 2) for emotion, value in confidence_by_emotion.items()}
|
| 95 |
+
average_confidence = round(average_confidence, 2)
|
| 96 |
+
|
| 97 |
+
logger.info(f"Using rounded confidence_data - confidence_by_emotion: {confidence_by_emotion}")
|
| 98 |
+
logger.info(f"Using rounded confidence_data - average_confidence: {average_confidence}")
|
| 99 |
+
|
| 100 |
+
# Store rounded values back to confidence_data for consistency
|
| 101 |
+
confidence_data['confidence_by_emotion'] = confidence_by_emotion
|
| 102 |
+
confidence_data['average_confidence'] = average_confidence
|
| 103 |
+
|
| 104 |
+
# If no raw_emotion_data found, fall back to other methods
|
| 105 |
+
if not raw_emotions:
|
| 106 |
+
logger.info("No raw_emotion_data found, trying alternative sources")
|
| 107 |
+
# First check if we have a main_face column
|
| 108 |
+
if 'main_face' in emotion_df.columns and not emotion_df.empty:
|
| 109 |
+
first_row = emotion_df.iloc[0]
|
| 110 |
+
main_face = first_row.get('main_face', {})
|
| 111 |
+
if isinstance(main_face, dict) and main_face and 'emotion' in main_face:
|
| 112 |
+
raw_emotions = main_face['emotion']
|
| 113 |
+
logger.info(f"Using emotion from main_face: {raw_emotions}")
|
| 114 |
+
|
| 115 |
+
# If still no raw emotions, try emotion_scores from first row
|
| 116 |
+
if not raw_emotions and 'emotion_scores' in emotion_df.columns and not emotion_df.empty:
|
| 117 |
+
first_row = emotion_df.iloc[0]
|
| 118 |
+
emotion_scores = first_row.get('emotion_scores', {})
|
| 119 |
+
if isinstance(emotion_scores, dict) and emotion_scores:
|
| 120 |
+
raw_emotions = emotion_scores
|
| 121 |
+
logger.info(f"Using emotion_scores from first row: {raw_emotions}")
|
| 122 |
+
|
| 123 |
+
# If still no raw emotions found, log this issue
|
| 124 |
+
if not raw_emotions:
|
| 125 |
+
logger.warning("No emotion data found in the DataFrame")
|
| 126 |
+
# Use empty dict with zero values for all emotions
|
| 127 |
+
raw_emotions = {
|
| 128 |
+
"angry": 0, "disgust": 0, "fear": 0, "happy": 0,
|
| 129 |
+
"sad": 0, "surprise": 0, "neutral": 0
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
# Extract confidence values if available
|
| 133 |
+
average_confidence = 0
|
| 134 |
+
|
| 135 |
+
# If we have a 'confidence_by_emotion' stat available in any fashion, use it
|
| 136 |
+
if 'main_face' in emotion_df.columns and not emotion_df.empty:
|
| 137 |
+
# Calculate confidence values from dominant emotions in the data
|
| 138 |
+
confidence_values = []
|
| 139 |
+
emotion_confidence_counts = {}
|
| 140 |
+
|
| 141 |
+
for index, row in emotion_df.iterrows():
|
| 142 |
+
if 'main_face' in row and row['main_face'] and 'emotion_confidence' in row['main_face']:
|
| 143 |
+
confidence = row['main_face']['emotion_confidence']
|
| 144 |
+
emotion = row['main_face'].get('dominant_emotion', 'neutral')
|
| 145 |
+
|
| 146 |
+
# Add to average confidence
|
| 147 |
+
confidence_values.append(confidence)
|
| 148 |
+
|
| 149 |
+
# Track by emotion
|
| 150 |
+
if emotion not in emotion_confidence_counts:
|
| 151 |
+
emotion_confidence_counts[emotion] = []
|
| 152 |
+
emotion_confidence_counts[emotion].append(confidence)
|
| 153 |
+
|
| 154 |
+
# Calculate average confidence
|
| 155 |
+
if confidence_values:
|
| 156 |
+
average_confidence = sum(confidence_values) / len(confidence_values)
|
| 157 |
+
|
| 158 |
+
# Calculate average confidence by emotion
|
| 159 |
+
for emotion, confidences in emotion_confidence_counts.items():
|
| 160 |
+
if confidences:
|
| 161 |
+
confidence_by_emotion[emotion] = sum(confidences) / len(confidences)
|
| 162 |
+
|
| 163 |
+
# If we don't have confidence values, check if we have any in first face
|
| 164 |
+
if not confidence_by_emotion and 'faces' in emotion_df.columns and not emotion_df.empty:
|
| 165 |
+
for index, row in emotion_df.iterrows():
|
| 166 |
+
if 'faces' in row and row['faces'] and len(row['faces']) > 0 and 'emotion_confidence' in row['faces'][0]:
|
| 167 |
+
confidence = row['faces'][0]['emotion_confidence']
|
| 168 |
+
emotion = row['faces'][0].get('dominant_emotion', 'neutral')
|
| 169 |
+
|
| 170 |
+
# Add to average confidence
|
| 171 |
+
if 'confidence_values' not in locals():
|
| 172 |
+
confidence_values = []
|
| 173 |
+
confidence_values.append(confidence)
|
| 174 |
+
|
| 175 |
+
# Track by emotion
|
| 176 |
+
if emotion not in emotion_confidence_counts:
|
| 177 |
+
emotion_confidence_counts = {}
|
| 178 |
+
emotion_confidence_counts[emotion] = []
|
| 179 |
+
emotion_confidence_counts[emotion].append(confidence)
|
| 180 |
+
|
| 181 |
+
# Calculate average confidence
|
| 182 |
+
if 'confidence_values' in locals() and confidence_values:
|
| 183 |
+
average_confidence = sum(confidence_values) / len(confidence_values)
|
| 184 |
+
|
| 185 |
+
# Calculate average confidence by emotion
|
| 186 |
+
for emotion, confidences in emotion_confidence_counts.items():
|
| 187 |
+
if confidences:
|
| 188 |
+
confidence_by_emotion[emotion] = sum(confidences) / len(confidences)
|
| 189 |
+
|
| 190 |
+
# If we still don't have confidence values, use the raw emotions as proxy for confidence
|
| 191 |
+
if not confidence_by_emotion and raw_emotions:
|
| 192 |
+
# Use the raw emotion values as proxy for confidence
|
| 193 |
+
# This ensures we at least have something
|
| 194 |
+
confidence_by_emotion = {k: round(v, 2) for k, v in raw_emotions.items()}
|
| 195 |
+
dominant_emotion, max_value = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0))
|
| 196 |
+
average_confidence = max_value
|
| 197 |
+
|
| 198 |
+
# Format the confidence values for display
|
| 199 |
+
for emotion in confidence_by_emotion:
|
| 200 |
+
# Do not round the values to preserve the exact data
|
| 201 |
+
pass
|
| 202 |
+
|
| 203 |
+
# Add debug logging for average_confidence
|
| 204 |
+
logger.info(f"Final average_confidence value to be used in result: {average_confidence}")
|
| 205 |
+
|
| 206 |
+
# Get the original average_confidence from the confidence_data for the database
|
| 207 |
+
db_average_confidence = confidence_data.get("average_confidence", average_confidence)
|
| 208 |
+
logger.info(f"Using average_confidence from confidence_data for database: {db_average_confidence}")
|
| 209 |
+
|
| 210 |
+
# Determine overall sentiment based on the dominant emotion
|
| 211 |
+
if 'overall_sentiment' in first_row and first_row['overall_sentiment']:
|
| 212 |
+
# Use the exact sentiment from the DataFrame if available
|
| 213 |
+
sentiment = first_row['overall_sentiment']
|
| 214 |
+
logger.info(f"Using overall_sentiment from DataFrame: {sentiment}")
|
| 215 |
+
elif raw_emotions:
|
| 216 |
+
# Find the dominant emotion only if we don't have a sentiment already
|
| 217 |
+
dominant_emotion, _ = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0))
|
| 218 |
+
sentiment = dominant_emotion.capitalize()
|
| 219 |
+
logger.info(f"Calculated sentiment from raw_emotions: {sentiment}")
|
| 220 |
+
else:
|
| 221 |
+
# Use the standard method if no raw emotions
|
| 222 |
+
sentiment = self._determine_sentiment(raw_emotions)
|
| 223 |
+
logger.info(f"Determined sentiment via standard method: {sentiment}")
|
| 224 |
+
|
| 225 |
+
# Prepare prompt for OpenAI
|
| 226 |
+
prompt = self._generate_prompt(
|
| 227 |
+
sentiment=sentiment,
|
| 228 |
+
raw_emotions=raw_emotions,
|
| 229 |
+
confidence_by_emotion=confidence_by_emotion,
|
| 230 |
+
average_confidence=average_confidence,
|
| 231 |
+
transcript=transcript,
|
| 232 |
+
language=language,
|
| 233 |
+
interview_assessment=interview_assessment,
|
| 234 |
+
eye_contact_data=eye_contact_data,
|
| 235 |
+
body_language_data=body_language_data,
|
| 236 |
+
)
|
| 237 |
+
logger.info(f"Generated prompt: {prompt}")
|
| 238 |
+
# Call OpenAI API
|
| 239 |
+
try:
|
| 240 |
+
system_prompt = """
|
| 241 |
+
You are an expert in analyzing emotions and speech for job interviews and professional presentations.
|
| 242 |
+
You are given a transcript of a video, a summary of the emotions expressed in the video, and detailed interview assessment data when available.
|
| 243 |
+
You are also given the overall sentiment of the video.
|
| 244 |
+
You may also be provided with face analysis, eye contact analysis, and body language analysis.
|
| 245 |
+
You are to analyze all provided data and provide a comprehensive analysis in JSON format.
|
| 246 |
+
Your evaluation must be based on the transcript, emotions expressed, interview assessment data, face analysis, eye contact analysis, and body language analysis (when provided).
|
| 247 |
+
You are to provide a detailed analysis, including:
|
| 248 |
+
- Key points from the transcript
|
| 249 |
+
- Language quality assessment
|
| 250 |
+
- Confidence indicators
|
| 251 |
+
- Overall assessment of the performance including body language, eye contact, and professional appearance
|
| 252 |
+
- Recommendations for improving emotional expression, communication, body language, and professional appearance
|
| 253 |
+
|
| 254 |
+
Please provide a comprehensive analysis in JSON format with the following structure:
|
| 255 |
+
{
|
| 256 |
+
"Transcript Analysis": {
|
| 257 |
+
"Key Points": List of key points as bullet points <ul>...</ul> in HTML format from the transcript with critical insight for an HR manager. Use bold <b>...</b> tags to highlight important points.
|
| 258 |
+
"Language Quality": Bullet points <ul>...</ul> in HTML format of assessment of language use, vocabulary,grammar mistakes, clarity, professionalism, and other language-related metrics. Use bold <b>...</b> tags to highlight important points.
|
| 259 |
+
"Confidence Indicators": Bullet points <ul>...</ul> in HTML format of analysis of confidence based on language.
|
| 260 |
+
},
|
| 261 |
+
"Body Language Analysis": {
|
| 262 |
+
"Eye Contact": Analysis of eye contact patterns in HTML format based on the interview assessment data.
|
| 263 |
+
"Posture and Movement": Analysis of posture, movement, and other body language indicators in HTML format.
|
| 264 |
+
"Overall Body Language": Summary assessment of body language in HTML format.
|
| 265 |
+
},
|
| 266 |
+
"Overall Summary": overall assessment of the candidate interview performance with critical insight for an HR manager. Use a chain of thought approach to analyze all available data and provide a comprehensive analysis. Write in HTML and highlight important points with bold <b>...</b> tags.
|
| 267 |
+
"Recommendations": {
|
| 268 |
+
"Emotional Expression": bullet points <ul>...</ul> in HTML format of recommendations for improving emotional expression using bold <b>...</b> tags.
|
| 269 |
+
"Communication": bullet points <ul>...</ul> in HTML format of recommendations for improving communication using bold <b>...</b> tags.
|
| 270 |
+
"Body Language": bullet points <ul>...</ul> in HTML format of specific recommendations for improving body language based on the assessment data using bold <b>...</b> tags.
|
| 271 |
+
"Professional Appearance": bullet points <ul>...</ul> in HTML format of specific recommendations for improving professional appearance using bold <b>...</b> tags.
|
| 272 |
+
}
|
| 273 |
+
}
|
| 274 |
+
"""
|
| 275 |
+
|
| 276 |
+
response = self.client.chat.completions.create(
|
| 277 |
+
model=model_name,
|
| 278 |
+
messages=[
|
| 279 |
+
{"role": "system", "content": system_prompt},
|
| 280 |
+
{"role": "user", "content": prompt}
|
| 281 |
+
],
|
| 282 |
+
temperature=0.7,
|
| 283 |
+
max_tokens=2500,
|
| 284 |
+
frequency_penalty=0,
|
| 285 |
+
presence_penalty=0.2
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
analysis_text = response.choices[0].message.content.strip()
|
| 289 |
+
|
| 290 |
+
# Parse the JSON response
|
| 291 |
+
try:
|
| 292 |
+
analysis = json.loads(analysis_text)
|
| 293 |
+
logger.info("Successfully parsed the OpenAI response")
|
| 294 |
+
except Exception as parse_error:
|
| 295 |
+
logger.error(f"Failed to parse OpenAI response as JSON: {str(parse_error)}")
|
| 296 |
+
logger.info(f"Response content: {analysis_text}")
|
| 297 |
+
analysis = self._extract_json_from_text(analysis_text)
|
| 298 |
+
|
| 299 |
+
if not analysis:
|
| 300 |
+
logger.warning("Returning standard analysis structure with error message")
|
| 301 |
+
analysis = self._generate_empty_analysis()
|
| 302 |
+
analysis["Error"] = "Failed to parse OpenAI response"
|
| 303 |
+
|
| 304 |
+
# Add raw emotion data to the analysis for consistency with database storage
|
| 305 |
+
analysis["Emotion Analysis"] = {
|
| 306 |
+
"Dominant Emotions": raw_emotions,
|
| 307 |
+
"Confidence By Emotion": confidence_by_emotion,
|
| 308 |
+
"Overall Sentiment": sentiment,
|
| 309 |
+
"Average Confidence": db_average_confidence
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
# Add eye contact and body language data directly to the analysis
|
| 313 |
+
# to ensure it's preserved in the returned JSON, using the same keys
|
| 314 |
+
# as in the video_processor.py when it creates comprehensive_results
|
| 315 |
+
if eye_contact_data:
|
| 316 |
+
# Use lowercase key to match video_processor.py
|
| 317 |
+
key = "eye_contact_analysis"
|
| 318 |
+
analysis[key] = eye_contact_data
|
| 319 |
+
logger.info(f"Added {key} to results with {len(str(eye_contact_data))} characters")
|
| 320 |
+
|
| 321 |
+
if body_language_data:
|
| 322 |
+
# Use lowercase key to match video_processor.py
|
| 323 |
+
key = "body_language_analysis"
|
| 324 |
+
analysis[key] = body_language_data
|
| 325 |
+
logger.info(f"Added {key} to results with {len(str(body_language_data))} characters")
|
| 326 |
+
|
| 327 |
+
if face_analysis_data:
|
| 328 |
+
# Use lowercase key to match video_processor.py
|
| 329 |
+
key = "face_analysis"
|
| 330 |
+
analysis[key] = face_analysis_data
|
| 331 |
+
logger.info(f"Added {key} to results with {len(str(face_analysis_data))} characters")
|
| 332 |
+
|
| 333 |
+
# Log the exact emotion analysis that will be stored in the database
|
| 334 |
+
logger.info(f"Emotion Analysis to be stored in database: {analysis['Emotion Analysis']}")
|
| 335 |
+
logger.info(f"Added eye_contact_analysis to results: {bool(eye_contact_data)}")
|
| 336 |
+
logger.info(f"Added body_language_analysis to results: {bool(body_language_data)}")
|
| 337 |
+
logger.info(f"Added face_analysis to results: {bool(face_analysis_data)}")
|
| 338 |
+
|
| 339 |
+
return analysis
|
| 340 |
+
|
| 341 |
+
except Exception as api_error:
|
| 342 |
+
logger.error(f"Error during OpenAI API call: {str(api_error)}")
|
| 343 |
+
analysis = self._generate_empty_analysis()
|
| 344 |
+
analysis["Error"] = f"OpenAI API error: {str(api_error)}"
|
| 345 |
+
|
| 346 |
+
# Still include the emotion data for consistency
|
| 347 |
+
analysis["Emotion Analysis"] = {
|
| 348 |
+
"Dominant Emotions": raw_emotions,
|
| 349 |
+
"Confidence By Emotion": confidence_by_emotion,
|
| 350 |
+
"Overall Sentiment": sentiment,
|
| 351 |
+
"Average Confidence": db_average_confidence
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
# Also include eye contact and body language data in error cases
|
| 355 |
+
if eye_contact_data:
|
| 356 |
+
key = "eye_contact_analysis"
|
| 357 |
+
analysis[key] = eye_contact_data
|
| 358 |
+
logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters")
|
| 359 |
+
|
| 360 |
+
if body_language_data:
|
| 361 |
+
key = "body_language_analysis"
|
| 362 |
+
analysis[key] = body_language_data
|
| 363 |
+
logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters")
|
| 364 |
+
|
| 365 |
+
if face_analysis_data:
|
| 366 |
+
key = "face_analysis"
|
| 367 |
+
analysis[key] = face_analysis_data
|
| 368 |
+
logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters")
|
| 369 |
+
|
| 370 |
+
return analysis
|
| 371 |
+
|
| 372 |
+
except Exception as e:
|
| 373 |
+
logger.error(f"Error during analysis: {str(e)}")
|
| 374 |
+
analysis = self._generate_empty_analysis()
|
| 375 |
+
analysis["Error"] = f"Analysis error: {str(e)}"
|
| 376 |
+
|
| 377 |
+
# Also include eye contact and body language data in error cases
|
| 378 |
+
if eye_contact_data:
|
| 379 |
+
key = "eye_contact_analysis"
|
| 380 |
+
analysis[key] = eye_contact_data
|
| 381 |
+
logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters")
|
| 382 |
+
|
| 383 |
+
if body_language_data:
|
| 384 |
+
key = "body_language_analysis"
|
| 385 |
+
analysis[key] = body_language_data
|
| 386 |
+
logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters")
|
| 387 |
+
|
| 388 |
+
if face_analysis_data:
|
| 389 |
+
key = "face_analysis"
|
| 390 |
+
analysis[key] = face_analysis_data
|
| 391 |
+
logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters")
|
| 392 |
+
|
| 393 |
+
return analysis
|
| 394 |
+
|
| 395 |
+
def _calculate_emotion_percentages(self, emotion_df: pd.DataFrame) -> Dict[str, float]:
|
| 396 |
+
"""
|
| 397 |
+
Calculate percentages of different emotion categories based on raw emotion scores.
|
| 398 |
+
|
| 399 |
+
Args:
|
| 400 |
+
emotion_df: DataFrame with emotion data
|
| 401 |
+
|
| 402 |
+
Returns:
|
| 403 |
+
Dictionary with emotion percentages for each emotion and grouped categories
|
| 404 |
+
"""
|
| 405 |
+
# Early return for empty DataFrame
|
| 406 |
+
if emotion_df is None or emotion_df.empty:
|
| 407 |
+
return {
|
| 408 |
+
"angry": 0, "disgust": 0, "fear": 0, "happy": 0,
|
| 409 |
+
"sad": 0, "surprise": 0, "neutral": 0,
|
| 410 |
+
"positive": 0, "negative": 0
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
# Define emotion categories
|
| 414 |
+
all_emotions = {'angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'}
|
| 415 |
+
positive_emotions = {'happy', 'surprise'}
|
| 416 |
+
negative_emotions = {'angry', 'disgust', 'fear', 'sad'}
|
| 417 |
+
neutral_emotions = {'neutral'}
|
| 418 |
+
|
| 419 |
+
# Initialize counters for raw emotion scores
|
| 420 |
+
emotion_totals = {emotion: 0 for emotion in all_emotions}
|
| 421 |
+
total_score = 0
|
| 422 |
+
|
| 423 |
+
# Process each row's emotion scores
|
| 424 |
+
for _, row in emotion_df.iterrows():
|
| 425 |
+
# Try to get emotion scores from the row
|
| 426 |
+
emotion_scores = {}
|
| 427 |
+
|
| 428 |
+
# First check if we have raw emotion scores in the DataFrame
|
| 429 |
+
if 'emotion_scores' in row and row['emotion_scores']:
|
| 430 |
+
emotion_scores = row['emotion_scores']
|
| 431 |
+
|
| 432 |
+
# If no scores found, try to use the dominant emotion and confidence
|
| 433 |
+
if not emotion_scores and 'dominant_emotion' in row and 'emotion_confidence' in row:
|
| 434 |
+
emotion = row['dominant_emotion']
|
| 435 |
+
confidence = row['emotion_confidence']
|
| 436 |
+
if emotion != 'unknown' and confidence > 0:
|
| 437 |
+
emotion_scores = {emotion: confidence}
|
| 438 |
+
|
| 439 |
+
# Skip if no emotion data
|
| 440 |
+
if not emotion_scores:
|
| 441 |
+
continue
|
| 442 |
+
|
| 443 |
+
# Sum up scores by emotion
|
| 444 |
+
for emotion, score in emotion_scores.items():
|
| 445 |
+
total_score += score
|
| 446 |
+
if emotion in emotion_totals:
|
| 447 |
+
emotion_totals[emotion] += score
|
| 448 |
+
|
| 449 |
+
# Calculate percentages for each emotion
|
| 450 |
+
emotion_percentages = {}
|
| 451 |
+
if total_score > 0:
|
| 452 |
+
for emotion, total in emotion_totals.items():
|
| 453 |
+
emotion_percentages[emotion] = round((total / total_score) * 100, 2)
|
| 454 |
+
|
| 455 |
+
# Add grouped percentages
|
| 456 |
+
positive_total = sum(emotion_totals.get(emotion, 0) for emotion in positive_emotions)
|
| 457 |
+
negative_total = sum(emotion_totals.get(emotion, 0) for emotion in negative_emotions)
|
| 458 |
+
neutral_total = sum(emotion_totals.get(emotion, 0) for emotion in neutral_emotions)
|
| 459 |
+
|
| 460 |
+
emotion_percentages.update({
|
| 461 |
+
"positive": round((positive_total / total_score) * 100, 2),
|
| 462 |
+
"negative": round((negative_total / total_score) * 100, 2)
|
| 463 |
+
})
|
| 464 |
+
else:
|
| 465 |
+
# Return zeros if no data
|
| 466 |
+
emotion_percentages = {
|
| 467 |
+
"angry": 0, "disgust": 0, "fear": 0, "happy": 0,
|
| 468 |
+
"sad": 0, "surprise": 0, "neutral": 0,
|
| 469 |
+
"positive": 0, "negative": 0
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
return emotion_percentages
|
| 473 |
+
|
| 474 |
+
def _determine_sentiment(self, emotion_percentages: Dict[str, float]) -> str:
|
| 475 |
+
"""
|
| 476 |
+
Determine overall sentiment based on emotion percentages.
|
| 477 |
+
|
| 478 |
+
Args:
|
| 479 |
+
emotion_percentages: Dictionary with emotion percentages
|
| 480 |
+
|
| 481 |
+
Returns:
|
| 482 |
+
Sentiment assessment string
|
| 483 |
+
"""
|
| 484 |
+
# First try to determine sentiment from individual emotions
|
| 485 |
+
individual_emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
|
| 486 |
+
|
| 487 |
+
# Find the dominant individual emotion
|
| 488 |
+
max_emotion = None
|
| 489 |
+
max_score = -1
|
| 490 |
+
|
| 491 |
+
for emotion in individual_emotions:
|
| 492 |
+
if emotion in emotion_percentages and emotion_percentages[emotion] > max_score:
|
| 493 |
+
max_score = emotion_percentages[emotion]
|
| 494 |
+
max_emotion = emotion
|
| 495 |
+
|
| 496 |
+
# If we found a dominant individual emotion with significant percentage, use it
|
| 497 |
+
if max_emotion and max_score > 30:
|
| 498 |
+
return max_emotion.capitalize()
|
| 499 |
+
|
| 500 |
+
# Otherwise, fall back to category-based sentiment
|
| 501 |
+
positive = emotion_percentages.get("positive", 0)
|
| 502 |
+
negative = emotion_percentages.get("negative", 0)
|
| 503 |
+
neutral = emotion_percentages.get("neutral", 0)
|
| 504 |
+
|
| 505 |
+
# Use lookup table for thresholds
|
| 506 |
+
if positive > 60:
|
| 507 |
+
return "Very Positive"
|
| 508 |
+
if positive > 40:
|
| 509 |
+
return "Positive"
|
| 510 |
+
if negative > 60:
|
| 511 |
+
return "Very Negative"
|
| 512 |
+
if negative > 40:
|
| 513 |
+
return "Negative"
|
| 514 |
+
if neutral > 60:
|
| 515 |
+
return "Very Neutral"
|
| 516 |
+
if neutral > 40:
|
| 517 |
+
return "Neutral"
|
| 518 |
+
|
| 519 |
+
# Find dominant category
|
| 520 |
+
max_category = max(
|
| 521 |
+
("positive", positive),
|
| 522 |
+
("negative", negative),
|
| 523 |
+
("neutral", neutral),
|
| 524 |
+
key=lambda x: x[1]
|
| 525 |
+
)
|
| 526 |
+
|
| 527 |
+
# Map dominant category to sentiment
|
| 528 |
+
sentiment_map = {
|
| 529 |
+
"positive": "Slightly Positive",
|
| 530 |
+
"negative": "Slightly Negative",
|
| 531 |
+
"neutral": "Mixed" # Default case
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
return sentiment_map.get(max_category[0], "Mixed")
|
| 535 |
+
|
| 536 |
+
def _generate_prompt(
|
| 537 |
+
self,
|
| 538 |
+
sentiment: str,
|
| 539 |
+
raw_emotions: Dict[str, float],
|
| 540 |
+
confidence_by_emotion: Dict[str, float],
|
| 541 |
+
average_confidence: float,
|
| 542 |
+
transcript: str,
|
| 543 |
+
language: str = 'en',
|
| 544 |
+
interview_assessment: Optional[Dict[str, Any]] = None,
|
| 545 |
+
eye_contact_data: Optional[Dict[str, Any]] = None,
|
| 546 |
+
body_language_data: Optional[Dict[str, Any]] = None,
|
| 547 |
+
face_analysis_data: Optional[Dict[str, Any]] = None
|
| 548 |
+
) -> str:
|
| 549 |
+
"""
|
| 550 |
+
Generate a prompt for the AI model.
|
| 551 |
+
|
| 552 |
+
Args:
|
| 553 |
+
sentiment: Dominant sentiment
|
| 554 |
+
raw_emotions: Raw emotion scores
|
| 555 |
+
confidence_by_emotion: Confidence scores by emotion
|
| 556 |
+
average_confidence: Average confidence
|
| 557 |
+
transcript: Transcript text
|
| 558 |
+
language: Language of the transcript
|
| 559 |
+
interview_assessment: Optional interview assessment
|
| 560 |
+
eye_contact_data: Optional eye contact analysis data
|
| 561 |
+
body_language_data: Optional body language analysis data
|
| 562 |
+
face_analysis_data: Optional face analysis data
|
| 563 |
+
|
| 564 |
+
Returns:
|
| 565 |
+
Prompt for the AI model
|
| 566 |
+
"""
|
| 567 |
+
# Format the emotion data
|
| 568 |
+
emotions_str = ", ".join([f"{emotion}: {value:.1f}%" for emotion, value in raw_emotions.items()])
|
| 569 |
+
confidence_str = ", ".join([f"{emotion}: {value:.2f}" for emotion, value in confidence_by_emotion.items()])
|
| 570 |
+
|
| 571 |
+
# Include eye contact analysis if available
|
| 572 |
+
eye_contact_str = ""
|
| 573 |
+
if eye_contact_data:
|
| 574 |
+
ec_stats = eye_contact_data.get("eye_contact_stats", {})
|
| 575 |
+
ec_assessment = eye_contact_data.get("assessment", {})
|
| 576 |
+
if ec_stats and ec_assessment:
|
| 577 |
+
eye_contact_str = f"""
|
| 578 |
+
Eye Contact Analysis:
|
| 579 |
+
- Eye contact percentage: {ec_stats.get('eye_contact_percentage', 0):.1f}%
|
| 580 |
+
- Eye contact duration: {ec_stats.get('eye_contact_duration_seconds', 0):.1f} seconds
|
| 581 |
+
- Longest eye contact: {ec_stats.get('longest_eye_contact_seconds', 0):.1f} seconds
|
| 582 |
+
- Average contact duration: {ec_stats.get('average_contact_duration_seconds', 0):.1f} seconds
|
| 583 |
+
- Contact episodes: {ec_stats.get('contact_episodes', 0)}
|
| 584 |
+
- Assessment score: {ec_assessment.get('score', 0)}/10
|
| 585 |
+
- Key patterns: {', '.join(ec_assessment.get('patterns', []))}
|
| 586 |
+
"""
|
| 587 |
+
|
| 588 |
+
# Include body language analysis if available
|
| 589 |
+
body_language_str = ""
|
| 590 |
+
if body_language_data:
|
| 591 |
+
bl_stats = body_language_data.get("body_language_stats", {})
|
| 592 |
+
bl_assessment = body_language_data.get("assessment", {})
|
| 593 |
+
if bl_stats and bl_assessment:
|
| 594 |
+
body_language_str = f"""
|
| 595 |
+
Body Language Analysis:
|
| 596 |
+
- Shoulder misalignment percentage: {bl_stats.get('shoulder_misalignment_percentage', 0):.1f}%
|
| 597 |
+
- Leaning forward percentage: {bl_stats.get('leaning_forward_percentage', 0):.1f}%
|
| 598 |
+
- Head tilt percentage: {bl_stats.get('head_tilt_percentage', 0):.1f}%
|
| 599 |
+
- Arms crossed percentage: {bl_stats.get('arms_crossed_percentage', 0):.1f}%
|
| 600 |
+
- Self-touch percentage: {bl_stats.get('self_touch_percentage', 0):.1f}%
|
| 601 |
+
- Fidgeting percentage: {bl_stats.get('fidgeting_percentage', 0):.1f}%
|
| 602 |
+
- Pose shifts per minute: {bl_stats.get('pose_shifts_per_minute', 0):.1f}
|
| 603 |
+
- Confidence score: {bl_assessment.get('confidence_score', 0)}/10
|
| 604 |
+
- Engagement score: {bl_assessment.get('engagement_score', 0)}/10
|
| 605 |
+
- Comfort score: {bl_assessment.get('comfort_score', 0)}/10
|
| 606 |
+
- Overall score: {bl_assessment.get('overall_score', 0)}/10
|
| 607 |
+
"""
|
| 608 |
+
|
| 609 |
+
# Include face analysis if available
|
| 610 |
+
face_analysis_str = ""
|
| 611 |
+
if face_analysis_data:
|
| 612 |
+
face_analysis_str = f"""
|
| 613 |
+
Face Analysis:
|
| 614 |
+
- Professional Impression: {face_analysis_data.get('professionalImpression', 'No data')}
|
| 615 |
+
- Attire Assessment: {face_analysis_data.get('attireAssessment', 'No data')}
|
| 616 |
+
- Facial Expression: {face_analysis_data.get('facialExpressionAnalysis', 'No data')}
|
| 617 |
+
- Background Assessment: {face_analysis_data.get('backgroundAssessment', 'No data')}
|
| 618 |
+
- Personality Indicators: {face_analysis_data.get('personalityIndicators', 'No data')}
|
| 619 |
+
- Recommendations: {face_analysis_data.get('recommendationsForImprovement', 'No data')}
|
| 620 |
+
- Overall Score: {face_analysis_data.get('overallScore', 0)}/10
|
| 621 |
+
"""
|
| 622 |
+
|
| 623 |
+
# Format the interview assessment if available
|
| 624 |
+
interview_str = ""
|
| 625 |
+
if interview_assessment:
|
| 626 |
+
interview_str = f"""
|
| 627 |
+
Interview Assessment:
|
| 628 |
+
{json.dumps(interview_assessment, indent=2)}
|
| 629 |
+
"""
|
| 630 |
+
|
| 631 |
+
# Create the prompt with different instructions based on language
|
| 632 |
+
if language.lower() in ['en', 'eng', 'english']:
|
| 633 |
+
prompt = f"""
|
| 634 |
+
You are an expert in analyzing human emotions, body language, and eye contact in video interviews. Based on the transcript and emotional data provided, provide a comprehensive analysis of the interview.
|
| 635 |
+
|
| 636 |
+
Emotion Analysis:
|
| 637 |
+
Dominant emotion: {sentiment}
|
| 638 |
+
Emotion breakdown: {emotions_str}
|
| 639 |
+
Confidence by emotion: {confidence_str}
|
| 640 |
+
Average confidence: {average_confidence:.2f}
|
| 641 |
+
|
| 642 |
+
{eye_contact_str}
|
| 643 |
+
|
| 644 |
+
{body_language_str}
|
| 645 |
+
|
| 646 |
+
{face_analysis_str}
|
| 647 |
+
|
| 648 |
+
{interview_str}
|
| 649 |
+
|
| 650 |
+
Transcript:
|
| 651 |
+
{transcript}
|
| 652 |
+
|
| 653 |
+
Provide a comprehensive analysis with the following sections:
|
| 654 |
+
1. Emotion Analysis: Analyze the emotions detected in the video.
|
| 655 |
+
2. Transcript Analysis: Analyze the content of the transcript, key themes, and topics discussed.
|
| 656 |
+
3. Body Language Analysis: If body language data is available, analyze the body language observed.
|
| 657 |
+
4. Eye Contact Analysis: If eye contact data is available, analyze the eye contact patterns.
|
| 658 |
+
5. Face Analysis: If face analysis data is available, analyze the professional appearance, attire, and background.
|
| 659 |
+
6. Overall Summary: Provide a holistic view of the interview performance.
|
| 660 |
+
7. Recommendations: Suggest improvements for future interviews.
|
| 661 |
+
|
| 662 |
+
Format your response as a structured JSON with the following keys:
|
| 663 |
+
{{
|
| 664 |
+
"Emotion Analysis": {{ detailed analysis }},
|
| 665 |
+
"Transcript Analysis": {{ detailed analysis }},
|
| 666 |
+
"Body Language Analysis": {{ detailed analysis, if data is available }},
|
| 667 |
+
"Eye Contact Analysis": {{ detailed analysis, if data is available }},
|
| 668 |
+
"Face Analysis": {{ detailed analysis, if data is available }},
|
| 669 |
+
"Overall Summary": "summary text",
|
| 670 |
+
"Recommendations": {{ recommendations }}
|
| 671 |
+
}}
|
| 672 |
+
"""
|
| 673 |
+
else:
|
| 674 |
+
# Simplified prompt for other languages
|
| 675 |
+
prompt = f"""
|
| 676 |
+
Analyze the following transcript and emotion data.
|
| 677 |
+
|
| 678 |
+
Emotion data: {sentiment}, {emotions_str}
|
| 679 |
+
|
| 680 |
+
{eye_contact_str}
|
| 681 |
+
|
| 682 |
+
{body_language_str}
|
| 683 |
+
|
| 684 |
+
{face_analysis_str}
|
| 685 |
+
|
| 686 |
+
{interview_str}
|
| 687 |
+
|
| 688 |
+
Transcript: {transcript}
|
| 689 |
+
|
| 690 |
+
Provide a summary of the content and emotional state, formatted as JSON.
|
| 691 |
+
"""
|
| 692 |
+
|
| 693 |
+
return prompt
|
| 694 |
+
|
| 695 |
+
def _generate_empty_analysis(self) -> Dict[str, Any]:
|
| 696 |
+
"""
|
| 697 |
+
Generate empty analysis when no data is available.
|
| 698 |
+
|
| 699 |
+
Returns:
|
| 700 |
+
Empty analysis dictionary
|
| 701 |
+
"""
|
| 702 |
+
return {
|
| 703 |
+
"Emotion Analysis": {
|
| 704 |
+
"Dominant Emotions": {
|
| 705 |
+
"angry": 0,
|
| 706 |
+
"disgust": 0,
|
| 707 |
+
"fear": 0,
|
| 708 |
+
"happy": 0,
|
| 709 |
+
"sad": 0,
|
| 710 |
+
"surprise": 0,
|
| 711 |
+
"neutral": 0
|
| 712 |
+
},
|
| 713 |
+
"Confidence By Emotion": {
|
| 714 |
+
"angry": 0,
|
| 715 |
+
"disgust": 0,
|
| 716 |
+
"fear": 0,
|
| 717 |
+
"happy": 0,
|
| 718 |
+
"sad": 0,
|
| 719 |
+
"surprise": 0,
|
| 720 |
+
"neutral": 0
|
| 721 |
+
},
|
| 722 |
+
"Overall Sentiment": "No emotions detected",
|
| 723 |
+
"Average Confidence": 0
|
| 724 |
+
},
|
| 725 |
+
"Transcript Analysis": {
|
| 726 |
+
"Key Points": [],
|
| 727 |
+
"Language Quality": "No transcript available",
|
| 728 |
+
"Confidence Indicators": []
|
| 729 |
+
},
|
| 730 |
+
"Body Language Analysis": {
|
| 731 |
+
"Eye Contact": "No data available",
|
| 732 |
+
"Posture and Movement": "No data available",
|
| 733 |
+
"Overall Body Language": "No data available"
|
| 734 |
+
},
|
| 735 |
+
"Overall Summary": "No data available for analysis",
|
| 736 |
+
"Recommendations": {
|
| 737 |
+
"Emotional Expression": "No recommendations available",
|
| 738 |
+
"Communication": "No recommendations available",
|
| 739 |
+
"Body Language": "No recommendations available",
|
| 740 |
+
"Professional Appearance": "No recommendations available"
|
| 741 |
+
}
|
| 742 |
+
}
|
| 743 |
+
|
| 744 |
+
def _extract_json_from_text(self, text: str) -> Dict[str, Any]:
|
| 745 |
+
"""
|
| 746 |
+
Extract JSON from a text string that might contain other content.
|
| 747 |
+
|
| 748 |
+
Args:
|
| 749 |
+
text: The text to extract JSON from
|
| 750 |
+
|
| 751 |
+
Returns:
|
| 752 |
+
Extracted JSON as dict, or empty dict if extraction fails
|
| 753 |
+
"""
|
| 754 |
+
try:
|
| 755 |
+
# First try to parse the entire text as JSON
|
| 756 |
+
return json.loads(text)
|
| 757 |
+
except json.JSONDecodeError:
|
| 758 |
+
# If that fails, try to find JSON-like content
|
| 759 |
+
try:
|
| 760 |
+
# Check if text starts with markdown code block
|
| 761 |
+
if text.strip().startswith("```json"):
|
| 762 |
+
# Extract content between the markdown delimiters
|
| 763 |
+
parts = text.split("```")
|
| 764 |
+
if len(parts) >= 3: # At least opening and closing backticks with content between
|
| 765 |
+
# Get the content after the first ``` and before the next ```
|
| 766 |
+
json_str = parts[1]
|
| 767 |
+
# Remove "json" language identifier if present
|
| 768 |
+
json_str = json_str.replace("json", "", 1).strip()
|
| 769 |
+
# Try to parse the extracted JSON
|
| 770 |
+
return json.loads(json_str)
|
| 771 |
+
elif text.strip().startswith("```"):
|
| 772 |
+
# Similar handling for code blocks without language specification
|
| 773 |
+
parts = text.split("```")
|
| 774 |
+
if len(parts) >= 3:
|
| 775 |
+
json_str = parts[1].strip()
|
| 776 |
+
return json.loads(json_str)
|
| 777 |
+
|
| 778 |
+
# Find the first opening brace and the last closing brace
|
| 779 |
+
json_start = text.find('{')
|
| 780 |
+
json_end = text.rfind('}') + 1
|
| 781 |
+
|
| 782 |
+
if json_start >= 0 and json_end > json_start:
|
| 783 |
+
json_str = text[json_start:json_end]
|
| 784 |
+
# Try to parse the extracted JSON
|
| 785 |
+
return json.loads(json_str)
|
| 786 |
+
|
| 787 |
+
# If no braces found, look for markdown code blocks elsewhere in the text
|
| 788 |
+
if "```json" in text or "```" in text:
|
| 789 |
+
# Try to extract from code blocks
|
| 790 |
+
lines = text.split("\n")
|
| 791 |
+
start_line = -1
|
| 792 |
+
end_line = -1
|
| 793 |
+
|
| 794 |
+
for i, line in enumerate(lines):
|
| 795 |
+
if "```json" in line or line.strip() == "```":
|
| 796 |
+
if start_line == -1:
|
| 797 |
+
start_line = i
|
| 798 |
+
else:
|
| 799 |
+
end_line = i
|
| 800 |
+
break
|
| 801 |
+
|
| 802 |
+
if start_line != -1 and end_line != -1:
|
| 803 |
+
# Extract content between markdown delimiters
|
| 804 |
+
json_content = "\n".join(lines[start_line+1:end_line])
|
| 805 |
+
# Clean up and parse
|
| 806 |
+
json_content = json_content.replace("json", "", 1).strip()
|
| 807 |
+
return json.loads(json_content)
|
| 808 |
+
except Exception as e:
|
| 809 |
+
logger.error(f"Error extracting JSON from text: {str(e)}")
|
| 810 |
+
|
| 811 |
+
# If all extraction attempts fail, return empty dict
|
| 812 |
+
return {}
|
| 813 |
+
|
| 814 |
+
def _format_confidence_values(self, raw_emotions: Dict[str, float], confidence_by_emotion: Dict[str, float]) -> Dict[str, float]:
|
| 815 |
+
"""
|
| 816 |
+
Format the confidence values to match what's expected in the database.
|
| 817 |
+
|
| 818 |
+
Args:
|
| 819 |
+
raw_emotions: Raw emotion data
|
| 820 |
+
confidence_by_emotion: Confidence values by emotion
|
| 821 |
+
|
| 822 |
+
Returns:
|
| 823 |
+
Formatted confidence values
|
| 824 |
+
"""
|
| 825 |
+
# First check if we have proper confidence values from confidence_by_emotion
|
| 826 |
+
if confidence_by_emotion and any(val > 0 for val in confidence_by_emotion.values()):
|
| 827 |
+
logger.info(f"Using provided confidence values: {confidence_by_emotion}")
|
| 828 |
+
# Ensure values are properly formatted
|
| 829 |
+
return {emotion: round(confidence, 2) for emotion, confidence in confidence_by_emotion.items()}
|
| 830 |
+
else:
|
| 831 |
+
# No valid confidence values found, log this fact
|
| 832 |
+
logger.warning("No valid confidence values found, using raw emotions as proxy for confidence")
|
| 833 |
+
# Use the raw emotions as proxy for confidence (this was the source of the issue)
|
| 834 |
+
return {emotion: round(value, 2) for emotion, value in raw_emotions.items()}
|
| 835 |
+
|
| 836 |
+
def _get_dominant_confidence(self, raw_emotions: Dict[str, float], average_confidence: float) -> float:
|
| 837 |
+
"""
|
| 838 |
+
Get the confidence value of the dominant emotion.
|
| 839 |
+
|
| 840 |
+
Args:
|
| 841 |
+
raw_emotions: Raw emotion data
|
| 842 |
+
average_confidence: Average confidence value from the data
|
| 843 |
+
|
| 844 |
+
Returns:
|
| 845 |
+
Dominant emotion confidence
|
| 846 |
+
"""
|
| 847 |
+
# Simply return the provided average_confidence
|
| 848 |
+
# This method is maintained for backward compatibility
|
| 849 |
+
logger.info(f"Using average confidence: {average_confidence}")
|
| 850 |
+
return round(average_confidence, 2)
|
behavior_backend/app/services/processing/ai_face_analyzer.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import base64
|
| 3 |
+
import logging
|
| 4 |
+
from typing import List, Dict, Any, Optional, Union
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
from langchain_openai import ChatOpenAI
|
| 9 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 10 |
+
from langchain_anthropic import ChatAnthropic
|
| 11 |
+
from langchain_groq import ChatGroq
|
| 12 |
+
# Handle langchain_google_genai import error
|
| 13 |
+
try:
|
| 14 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 15 |
+
GEMINI_AVAILABLE = True
|
| 16 |
+
except ImportError:
|
| 17 |
+
# Gemini model is not available
|
| 18 |
+
ChatGoogleGenerativeAI = None
|
| 19 |
+
GEMINI_AVAILABLE = False
|
| 20 |
+
from langchain_core.messages.base import BaseMessage
|
| 21 |
+
|
| 22 |
+
# Fix import paths
|
| 23 |
+
try:
|
| 24 |
+
from app.utils.logging_utils import time_it, setup_logger
|
| 25 |
+
from app.core.config import settings
|
| 26 |
+
except ImportError:
|
| 27 |
+
# Try relative imports for running from project root
|
| 28 |
+
from behavior_backend.app.utils.logging_utils import time_it, setup_logger
|
| 29 |
+
# Mock settings for testing
|
| 30 |
+
class Settings:
|
| 31 |
+
def __init__(self):
|
| 32 |
+
self.OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
| 33 |
+
self.ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "")
|
| 34 |
+
self.GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
|
| 35 |
+
self.GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
|
| 36 |
+
|
| 37 |
+
settings = Settings()
|
| 38 |
+
|
| 39 |
+
# Configure logging
|
| 40 |
+
logger = setup_logger(__name__)
|
| 41 |
+
|
| 42 |
+
class AIFaceAnalyzer:
|
| 43 |
+
"""Service for analyzing candidate profile pictures using Visual LLMs."""
|
| 44 |
+
|
| 45 |
+
def __init__(self, provider: str = "openai"):
|
| 46 |
+
"""
|
| 47 |
+
Initialize the AI face analyzer service.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
provider: The LLM provider to use ('openai', 'anthropic', 'groq', or 'gemini')
|
| 51 |
+
"""
|
| 52 |
+
# If Gemini isn't available and that's the requested provider, fall back to OpenAI
|
| 53 |
+
if provider.lower() == "gemini" and not GEMINI_AVAILABLE:
|
| 54 |
+
logger.warning("Gemini provider requested but langchain_google_genai is not installed. Falling back to OpenAI.")
|
| 55 |
+
provider = "openai"
|
| 56 |
+
|
| 57 |
+
self.provider = provider.lower()
|
| 58 |
+
self._init_model()
|
| 59 |
+
|
| 60 |
+
def _init_model(self):
|
| 61 |
+
"""Initialize the LLM model based on the selected provider."""
|
| 62 |
+
if self.provider == "openai":
|
| 63 |
+
api_key = os.environ.get("OPENAI_API_KEY") or getattr(settings, "OPENAI_API_KEY", "")
|
| 64 |
+
if not api_key:
|
| 65 |
+
raise ValueError("OPENAI_API_KEY not found in environment or settings")
|
| 66 |
+
self.model = ChatOpenAI(
|
| 67 |
+
model="gpt-4o-mini",
|
| 68 |
+
max_tokens=4096,
|
| 69 |
+
temperature=0.2,
|
| 70 |
+
api_key=api_key
|
| 71 |
+
)
|
| 72 |
+
elif self.provider == "anthropic":
|
| 73 |
+
api_key = os.environ.get("ANTHROPIC_API_KEY") or getattr(settings, "ANTHROPIC_API_KEY", "")
|
| 74 |
+
if not api_key:
|
| 75 |
+
raise ValueError("ANTHROPIC_API_KEY not found in environment or settings")
|
| 76 |
+
self.model = ChatAnthropic(
|
| 77 |
+
model="claude-3-sonnet-20240229",
|
| 78 |
+
temperature=0.2,
|
| 79 |
+
max_tokens=4096,
|
| 80 |
+
api_key=api_key
|
| 81 |
+
)
|
| 82 |
+
elif self.provider == "groq":
|
| 83 |
+
api_key = os.environ.get("GROQ_API_KEY") or getattr(settings, "GROQ_API_KEY", "")
|
| 84 |
+
if not api_key:
|
| 85 |
+
raise ValueError("GROQ_API_KEY not found in environment or settings")
|
| 86 |
+
self.model = ChatGroq(
|
| 87 |
+
model="qwen-2.5-32b",
|
| 88 |
+
temperature=0.2,
|
| 89 |
+
api_key=api_key
|
| 90 |
+
)
|
| 91 |
+
logger.warning("Groq doesn't currently support vision models. Falling back to text-only analysis.")
|
| 92 |
+
elif self.provider == "gemini":
|
| 93 |
+
# This should not happen due to the check in __init__, but just in case
|
| 94 |
+
if not GEMINI_AVAILABLE:
|
| 95 |
+
logger.error("Gemini provider selected but langchain_google_genai is not installed!")
|
| 96 |
+
logger.info("Falling back to OpenAI provider")
|
| 97 |
+
self.provider = "openai"
|
| 98 |
+
return self._init_model()
|
| 99 |
+
|
| 100 |
+
api_key = os.environ.get("GOOGLE_API_KEY") or getattr(settings, "GOOGLE_API_KEY", "")
|
| 101 |
+
if not api_key:
|
| 102 |
+
raise ValueError("GOOGLE_API_KEY not found in environment or settings")
|
| 103 |
+
self.model = ChatGoogleGenerativeAI(
|
| 104 |
+
model="gemini-1.5-pro",
|
| 105 |
+
temperature=0.2,
|
| 106 |
+
max_tokens=4096,
|
| 107 |
+
timeout=None,
|
| 108 |
+
max_retries=2,
|
| 109 |
+
api_key=api_key,
|
| 110 |
+
#convert_system_message_to_human=True # Gemini requires converting system messages to human
|
| 111 |
+
)
|
| 112 |
+
else:
|
| 113 |
+
raise ValueError(f"Unsupported provider: {self.provider}. Use 'openai', 'anthropic', 'groq', or 'gemini'.")
|
| 114 |
+
|
| 115 |
+
def _encode_image_to_base64(self, image_path: Union[str, Path]) -> str:
|
| 116 |
+
"""
|
| 117 |
+
Encode an image to base64.
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
image_path: Path to the image file
|
| 121 |
+
|
| 122 |
+
Returns:
|
| 123 |
+
Base64 encoded image
|
| 124 |
+
"""
|
| 125 |
+
image_path = Path(image_path)
|
| 126 |
+
if not image_path.exists():
|
| 127 |
+
raise FileNotFoundError(f"Image file not found: {image_path}")
|
| 128 |
+
|
| 129 |
+
with open(image_path, "rb") as image_file:
|
| 130 |
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
| 131 |
+
|
| 132 |
+
def _prepare_messages(self, image_paths: List[Union[str, Path]], job_title: Optional[str] = None) -> List[BaseMessage]:
|
| 133 |
+
"""
|
| 134 |
+
Prepare messages for the LLM with images.
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
image_paths: List of paths to the images
|
| 138 |
+
job_title: Optional job title for context
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
List of messages for the LLM
|
| 142 |
+
"""
|
| 143 |
+
system_prompt = """You are an expert in professional appearance analysis for job interviews.
|
| 144 |
+
Analyze the candidate's picture(s) randonly selected from a video (interview/self-introduction/etc) and provide an assessment of their professional appearance,
|
| 145 |
+
focusing on:
|
| 146 |
+
|
| 147 |
+
1. Overall professional impression
|
| 148 |
+
2. Attire and dress code appropriateness
|
| 149 |
+
3. Facial expressions and perceived attitude
|
| 150 |
+
4. Background and setting appropriateness
|
| 151 |
+
5. Visual cues that might indicate personality traits relevant for professional settings
|
| 152 |
+
6. Areas of improvement for professional presentation
|
| 153 |
+
|
| 154 |
+
Ouput: it must be a valid and structured JSON object.
|
| 155 |
+
|
| 156 |
+
Provide your analysis in a structured JSON format with these keys:
|
| 157 |
+
- professionalImpression: Overall analysis of how professional the candidate appears
|
| 158 |
+
- attireAssessment: Analysis of clothing and accessories
|
| 159 |
+
- facialExpressionAnalysis: Analysis of facial expressions, perceived emotions and attitude
|
| 160 |
+
- backgroundAssessment: Analysis of the photo background and setting
|
| 161 |
+
- personalityIndicators: Potential personality traits inferred from visual cues
|
| 162 |
+
- recommendationsForImprovement: Specific recommendations for improving professional appearance
|
| 163 |
+
- overallScore: A score from 1-10 on professional appearance suitability"""
|
| 164 |
+
|
| 165 |
+
system_message = SystemMessage(content=system_prompt)
|
| 166 |
+
|
| 167 |
+
# Create the content for the human message
|
| 168 |
+
content = []
|
| 169 |
+
|
| 170 |
+
# Add text content
|
| 171 |
+
text_content = "Please analyze this candidate's profile picture"
|
| 172 |
+
if job_title:
|
| 173 |
+
text_content += f" for a {job_title} position"
|
| 174 |
+
text_content += "."
|
| 175 |
+
content.append(text_content)
|
| 176 |
+
|
| 177 |
+
# Add image content
|
| 178 |
+
for image_path in image_paths:
|
| 179 |
+
try:
|
| 180 |
+
base64_image = self._encode_image_to_base64(image_path)
|
| 181 |
+
if self.provider == "openai":
|
| 182 |
+
content.append({
|
| 183 |
+
"type": "image_url",
|
| 184 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
|
| 185 |
+
})
|
| 186 |
+
elif self.provider == "anthropic":
|
| 187 |
+
content.append({
|
| 188 |
+
"type": "image",
|
| 189 |
+
"source": {
|
| 190 |
+
"type": "base64",
|
| 191 |
+
"media_type": "image/jpeg",
|
| 192 |
+
"data": base64_image
|
| 193 |
+
}
|
| 194 |
+
})
|
| 195 |
+
elif self.provider == "gemini" and GEMINI_AVAILABLE:
|
| 196 |
+
content.append({
|
| 197 |
+
"type": "image_url",
|
| 198 |
+
"image_url": f"data:image/jpeg;base64,{base64_image}"
|
| 199 |
+
})
|
| 200 |
+
except Exception as e:
|
| 201 |
+
logger.error(f"Error encoding image {image_path}: {e}")
|
| 202 |
+
|
| 203 |
+
human_message = HumanMessage(content=content)
|
| 204 |
+
|
| 205 |
+
# For Gemini, we need to combine system message with human message
|
| 206 |
+
if self.provider == "gemini" and GEMINI_AVAILABLE:
|
| 207 |
+
return [system_message, human_message]
|
| 208 |
+
else:
|
| 209 |
+
return [system_message, human_message]
|
| 210 |
+
|
| 211 |
+
@time_it
|
| 212 |
+
def analyze_profile_pictures(
|
| 213 |
+
self,
|
| 214 |
+
image_paths: List[Union[str, Path]],
|
| 215 |
+
job_title: Optional[str] = None
|
| 216 |
+
) -> Dict[str, Any]:
|
| 217 |
+
"""
|
| 218 |
+
Analyze candidate profile pictures using the configured LLM.
|
| 219 |
+
|
| 220 |
+
Args:
|
| 221 |
+
image_paths: List of paths to the profile pictures (1-3 images)
|
| 222 |
+
job_title: Optional job title for context
|
| 223 |
+
|
| 224 |
+
Returns:
|
| 225 |
+
Dictionary with analysis results
|
| 226 |
+
"""
|
| 227 |
+
if not image_paths:
|
| 228 |
+
logger.warning("No images provided for analysis")
|
| 229 |
+
return self._generate_empty_analysis()
|
| 230 |
+
|
| 231 |
+
# Limit to max 3 images
|
| 232 |
+
if len(image_paths) > 3:
|
| 233 |
+
logger.warning(f"Too many images provided ({len(image_paths)}). Using only the first 3.")
|
| 234 |
+
image_paths = image_paths[:3]
|
| 235 |
+
|
| 236 |
+
try:
|
| 237 |
+
logger.info(f"Analyzing {len(image_paths)} profile pictures with {self.provider}")
|
| 238 |
+
|
| 239 |
+
# Prepare messages with images
|
| 240 |
+
messages = self._prepare_messages(image_paths, job_title)
|
| 241 |
+
|
| 242 |
+
# Get response from LLM
|
| 243 |
+
response = self.model.invoke(messages)
|
| 244 |
+
|
| 245 |
+
# Extract and parse the response content
|
| 246 |
+
response_content = response.content.replace("```json", "").replace("```", "")
|
| 247 |
+
|
| 248 |
+
# Try to parse JSON from the response
|
| 249 |
+
try:
|
| 250 |
+
# First, try to extract JSON if it's wrapped in markdown code blocks
|
| 251 |
+
if "```json" in response_content and "```" in response_content.split("```json", 1)[1]:
|
| 252 |
+
json_str = response_content.split("```json", 1)[1].split("```", 1)[0].strip()
|
| 253 |
+
result = json.loads(json_str)
|
| 254 |
+
elif "```" in response_content and "```" in response_content.split("```", 1)[1]:
|
| 255 |
+
json_str = response_content.split("```", 1)[1].split("```", 1)[0].strip()
|
| 256 |
+
result = json.loads(json_str)
|
| 257 |
+
else:
|
| 258 |
+
# If not in code blocks, try parsing the whole response
|
| 259 |
+
result = json.loads(response_content)
|
| 260 |
+
except json.JSONDecodeError:
|
| 261 |
+
logger.warning(f"Failed to parse JSON from response: {response_content}")
|
| 262 |
+
# Create a formatted response manually
|
| 263 |
+
result = {
|
| 264 |
+
"professionalImpression": "Could not parse structured analysis",
|
| 265 |
+
"rawResponse": response_content
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
# Add metadata
|
| 269 |
+
result["provider"] = self.provider
|
| 270 |
+
result["imageCount"] = len(image_paths)
|
| 271 |
+
|
| 272 |
+
return result
|
| 273 |
+
|
| 274 |
+
except Exception as e:
|
| 275 |
+
logger.error(f"Error analyzing profile pictures: {e}")
|
| 276 |
+
return self._generate_empty_analysis()
|
| 277 |
+
|
| 278 |
+
def _generate_empty_analysis(self) -> Dict[str, Any]:
|
| 279 |
+
"""
|
| 280 |
+
Generate an empty analysis result when analysis fails.
|
| 281 |
+
|
| 282 |
+
Returns:
|
| 283 |
+
Empty analysis dictionary
|
| 284 |
+
"""
|
| 285 |
+
return {
|
| 286 |
+
"professionalImpression": "No analysis available",
|
| 287 |
+
"attireAssessment": "No analysis available",
|
| 288 |
+
"facialExpressionAnalysis": "No analysis available",
|
| 289 |
+
"backgroundAssessment": "No analysis available",
|
| 290 |
+
"personalityIndicators": "No analysis available",
|
| 291 |
+
"recommendationsForImprovement": "No analysis available",
|
| 292 |
+
"overallScore": 0,
|
| 293 |
+
"error": "Failed to analyze profile pictures",
|
| 294 |
+
"provider": self.provider,
|
| 295 |
+
"imageCount": 0
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
|
behavior_backend/app/services/processing/body_language_analyzer.py
ADDED
|
@@ -0,0 +1,1100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import mediapipe as mp
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import time
|
| 6 |
+
from collections import deque
|
| 7 |
+
import math
|
| 8 |
+
import json
|
| 9 |
+
import os
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from app.services.processing.eye_contact_analyzer import EyeContactAnalyzer
|
| 12 |
+
from app.services.processing.eye_contact_analyzer import analyze_eye_contact
|
| 13 |
+
from app.utils.device_utils import get_available_device
|
| 14 |
+
|
| 15 |
+
# Initialize device once at module level
|
| 16 |
+
DEVICE = get_available_device()
|
| 17 |
+
|
| 18 |
+
class BodyLanguageAnalyzer:
|
| 19 |
+
def __init__(self, history_size=100):
|
| 20 |
+
"""
|
| 21 |
+
Initialize the body language analyzer for interview assessment.
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
history_size: Number of frames to keep in history for rolling metrics
|
| 25 |
+
"""
|
| 26 |
+
# Initialize MediaPipe Pose and Holistic
|
| 27 |
+
self.mp_holistic = mp.solutions.holistic
|
| 28 |
+
self.mp_drawing = mp.solutions.drawing_utils
|
| 29 |
+
self.mp_drawing_styles = mp.solutions.drawing_styles
|
| 30 |
+
|
| 31 |
+
self.holistic = self.mp_holistic.Holistic(
|
| 32 |
+
min_detection_confidence=0.5,
|
| 33 |
+
min_tracking_confidence=0.5,
|
| 34 |
+
static_image_mode=False
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Stats tracking
|
| 38 |
+
self.history_size = history_size
|
| 39 |
+
self.total_frames = 0
|
| 40 |
+
self.start_time = time.time()
|
| 41 |
+
|
| 42 |
+
# Posture tracking
|
| 43 |
+
self.shoulder_alignment_history = deque(maxlen=history_size)
|
| 44 |
+
self.lean_forward_history = deque(maxlen=history_size)
|
| 45 |
+
self.head_tilt_history = deque(maxlen=history_size)
|
| 46 |
+
|
| 47 |
+
# Gesture tracking
|
| 48 |
+
self.hand_movement_history = deque(maxlen=history_size)
|
| 49 |
+
self.self_touch_history = deque(maxlen=history_size)
|
| 50 |
+
self.crossing_arms_history = deque(maxlen=history_size)
|
| 51 |
+
|
| 52 |
+
# Movement tracking
|
| 53 |
+
self.fidgeting_history = deque(maxlen=history_size)
|
| 54 |
+
self.pose_shift_history = deque(maxlen=history_size)
|
| 55 |
+
|
| 56 |
+
# Previous frame landmarks for movement detection
|
| 57 |
+
self.prev_pose_landmarks = None
|
| 58 |
+
self.prev_face_landmarks = None
|
| 59 |
+
self.prev_left_hand_landmarks = None
|
| 60 |
+
self.prev_right_hand_landmarks = None
|
| 61 |
+
|
| 62 |
+
# Threshold values
|
| 63 |
+
self.thresholds = {
|
| 64 |
+
'shoulder_alignment': 0.05, # Shoulder height difference ratio
|
| 65 |
+
'lean_forward': 0.4, # Forward lean threshold
|
| 66 |
+
'head_tilt': 0.1, # Head tilt angle threshold
|
| 67 |
+
'hand_movement': 0.03, # Hand movement threshold
|
| 68 |
+
'self_touch': 0.1, # Self-touch proximity threshold
|
| 69 |
+
'crossing_arms': 0.15, # Arms crossing threshold
|
| 70 |
+
'fidgeting': 0.02, # Fidgeting movement threshold
|
| 71 |
+
'pose_shift': 0.05 # Major posture shift threshold
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
# Current state
|
| 75 |
+
self.current_state = {
|
| 76 |
+
'shoulder_misalignment': 0,
|
| 77 |
+
'leaning_forward': 0,
|
| 78 |
+
'head_tilted': 0,
|
| 79 |
+
'hand_movement': 0,
|
| 80 |
+
'self_touching': 0,
|
| 81 |
+
'arms_crossed': 0,
|
| 82 |
+
'fidgeting': 0,
|
| 83 |
+
'pose_shifting': 0,
|
| 84 |
+
'last_pose_shift': 0
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
def reset_stats(self):
|
| 88 |
+
"""Reset all statistics for a new session."""
|
| 89 |
+
self.shoulder_alignment_history.clear()
|
| 90 |
+
self.lean_forward_history.clear()
|
| 91 |
+
self.head_tilt_history.clear()
|
| 92 |
+
self.hand_movement_history.clear()
|
| 93 |
+
self.self_touch_history.clear()
|
| 94 |
+
self.crossing_arms_history.clear()
|
| 95 |
+
self.fidgeting_history.clear()
|
| 96 |
+
self.pose_shift_history.clear()
|
| 97 |
+
|
| 98 |
+
self.total_frames = 0
|
| 99 |
+
self.start_time = time.time()
|
| 100 |
+
self.prev_pose_landmarks = None
|
| 101 |
+
self.prev_face_landmarks = None
|
| 102 |
+
self.prev_left_hand_landmarks = None
|
| 103 |
+
self.prev_right_hand_landmarks = None
|
| 104 |
+
|
| 105 |
+
def _calculate_distance(self, point1, point2):
|
| 106 |
+
"""Calculate Euclidean distance between two 3D points."""
|
| 107 |
+
return math.sqrt((point1.x - point2.x)**2 +
|
| 108 |
+
(point1.y - point2.y)**2 +
|
| 109 |
+
(point1.z - point2.z)**2)
|
| 110 |
+
|
| 111 |
+
def _calculate_angle(self, point1, point2, point3):
|
| 112 |
+
"""Calculate angle between three points."""
|
| 113 |
+
vector1 = np.array([point1.x - point2.x, point1.y - point2.y, point1.z - point2.z])
|
| 114 |
+
vector2 = np.array([point3.x - point2.x, point3.y - point2.y, point3.z - point2.z])
|
| 115 |
+
|
| 116 |
+
# Normalize vectors
|
| 117 |
+
norm1 = np.linalg.norm(vector1)
|
| 118 |
+
norm2 = np.linalg.norm(vector2)
|
| 119 |
+
|
| 120 |
+
if norm1 > 0 and norm2 > 0:
|
| 121 |
+
vector1 = vector1 / norm1
|
| 122 |
+
vector2 = vector2 / norm2
|
| 123 |
+
|
| 124 |
+
# Calculate dot product and angle
|
| 125 |
+
dot_product = np.clip(np.dot(vector1, vector2), -1.0, 1.0)
|
| 126 |
+
angle = np.arccos(dot_product)
|
| 127 |
+
return np.degrees(angle)
|
| 128 |
+
|
| 129 |
+
return 0
|
| 130 |
+
|
| 131 |
+
def _calculate_landmark_movement(self, current_landmark, previous_landmark):
|
| 132 |
+
"""Calculate movement between current and previous landmark position."""
|
| 133 |
+
if current_landmark is None or previous_landmark is None:
|
| 134 |
+
return 0
|
| 135 |
+
|
| 136 |
+
return self._calculate_distance(current_landmark, previous_landmark)
|
| 137 |
+
|
| 138 |
+
def _analyze_shoulder_alignment(self, pose_landmarks):
|
| 139 |
+
"""Analyze shoulder alignment (level shoulders vs. one higher than the other)."""
|
| 140 |
+
if pose_landmarks:
|
| 141 |
+
left_shoulder = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.LEFT_SHOULDER]
|
| 142 |
+
right_shoulder = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.RIGHT_SHOULDER]
|
| 143 |
+
|
| 144 |
+
# Calculate shoulder height difference (y-axis)
|
| 145 |
+
height_diff = abs(left_shoulder.y - right_shoulder.y)
|
| 146 |
+
|
| 147 |
+
# Normalize by shoulder width
|
| 148 |
+
shoulder_width = abs(left_shoulder.x - right_shoulder.x)
|
| 149 |
+
if shoulder_width > 0:
|
| 150 |
+
normalized_diff = height_diff / shoulder_width
|
| 151 |
+
self.shoulder_alignment_history.append(normalized_diff)
|
| 152 |
+
|
| 153 |
+
# Update current state
|
| 154 |
+
self.current_state['shoulder_misalignment'] = (
|
| 155 |
+
normalized_diff > self.thresholds['shoulder_alignment'])
|
| 156 |
+
|
| 157 |
+
return normalized_diff
|
| 158 |
+
|
| 159 |
+
return 0
|
| 160 |
+
|
| 161 |
+
def _analyze_lean_forward(self, pose_landmarks):
|
| 162 |
+
"""Analyze if the person is leaning forward."""
|
| 163 |
+
if pose_landmarks:
|
| 164 |
+
# Use shoulder and hip positions to determine lean
|
| 165 |
+
left_shoulder = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.LEFT_SHOULDER]
|
| 166 |
+
right_shoulder = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.RIGHT_SHOULDER]
|
| 167 |
+
left_hip = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.LEFT_HIP]
|
| 168 |
+
right_hip = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.RIGHT_HIP]
|
| 169 |
+
|
| 170 |
+
# Calculate average shoulder and hip positions
|
| 171 |
+
shoulder_z = (left_shoulder.z + right_shoulder.z) / 2
|
| 172 |
+
hip_z = (left_hip.z + right_hip.z) / 2
|
| 173 |
+
|
| 174 |
+
# Calculate lean (z-axis difference, normalized by height)
|
| 175 |
+
shoulder_hip_y_diff = abs((left_shoulder.y + right_shoulder.y)/2 -
|
| 176 |
+
(left_hip.y + right_hip.y)/2)
|
| 177 |
+
|
| 178 |
+
lean_forward = (shoulder_z - hip_z) / max(shoulder_hip_y_diff, 0.1)
|
| 179 |
+
|
| 180 |
+
# Track history
|
| 181 |
+
self.lean_forward_history.append(lean_forward)
|
| 182 |
+
|
| 183 |
+
# Update current state
|
| 184 |
+
self.current_state['leaning_forward'] = (
|
| 185 |
+
lean_forward > self.thresholds['lean_forward'])
|
| 186 |
+
|
| 187 |
+
return lean_forward
|
| 188 |
+
|
| 189 |
+
return 0
|
| 190 |
+
|
| 191 |
+
def _analyze_head_tilt(self, face_landmarks):
|
| 192 |
+
"""Analyze head tilt (left/right)."""
|
| 193 |
+
if face_landmarks:
|
| 194 |
+
# Use eye and ear positions to determine head tilt
|
| 195 |
+
left_eye = face_landmarks.landmark[33] # Left eye outer corner
|
| 196 |
+
right_eye = face_landmarks.landmark[263] # Right eye outer corner
|
| 197 |
+
|
| 198 |
+
# Calculate tilt angle from horizontal
|
| 199 |
+
angle = math.atan2(right_eye.y - left_eye.y, right_eye.x - left_eye.x)
|
| 200 |
+
tilt = abs(angle)
|
| 201 |
+
|
| 202 |
+
# Track history
|
| 203 |
+
self.head_tilt_history.append(tilt)
|
| 204 |
+
|
| 205 |
+
# Update current state
|
| 206 |
+
self.current_state['head_tilted'] = (
|
| 207 |
+
tilt > self.thresholds['head_tilt'])
|
| 208 |
+
|
| 209 |
+
return tilt
|
| 210 |
+
|
| 211 |
+
return 0
|
| 212 |
+
|
| 213 |
+
def _analyze_hand_movement(self, left_hand, right_hand):
|
| 214 |
+
"""Analyze hand movement and gestures."""
|
| 215 |
+
movement = 0
|
| 216 |
+
|
| 217 |
+
# Check left hand movement
|
| 218 |
+
if left_hand and self.prev_left_hand_landmarks:
|
| 219 |
+
# Use wrist as reference point
|
| 220 |
+
left_movement = self._calculate_landmark_movement(
|
| 221 |
+
left_hand.landmark[0], # Wrist landmark
|
| 222 |
+
self.prev_left_hand_landmarks.landmark[0]
|
| 223 |
+
)
|
| 224 |
+
movement = max(movement, left_movement)
|
| 225 |
+
|
| 226 |
+
# Check right hand movement
|
| 227 |
+
if right_hand and self.prev_right_hand_landmarks:
|
| 228 |
+
# Use wrist as reference point
|
| 229 |
+
right_movement = self._calculate_landmark_movement(
|
| 230 |
+
right_hand.landmark[0], # Wrist landmark
|
| 231 |
+
self.prev_right_hand_landmarks.landmark[0]
|
| 232 |
+
)
|
| 233 |
+
movement = max(movement, right_movement)
|
| 234 |
+
|
| 235 |
+
# Track history
|
| 236 |
+
self.hand_movement_history.append(movement)
|
| 237 |
+
|
| 238 |
+
# Update current state
|
| 239 |
+
self.current_state['hand_movement'] = (
|
| 240 |
+
movement > self.thresholds['hand_movement'])
|
| 241 |
+
|
| 242 |
+
return movement
|
| 243 |
+
|
| 244 |
+
def _analyze_self_touch(self, pose_landmarks, left_hand, right_hand, face_landmarks):
|
| 245 |
+
"""Detect if hands are touching face, hair, or other body parts."""
|
| 246 |
+
self_touch = 0
|
| 247 |
+
|
| 248 |
+
if face_landmarks:
|
| 249 |
+
# Check left hand to face proximity
|
| 250 |
+
if left_hand:
|
| 251 |
+
left_index_tip = left_hand.landmark[8] # Index finger tip
|
| 252 |
+
nose_tip = face_landmarks.landmark[4]
|
| 253 |
+
|
| 254 |
+
left_to_face_dist = self._calculate_distance(left_index_tip, nose_tip)
|
| 255 |
+
self_touch = max(self_touch, 1.0 - min(left_to_face_dist * 5, 1.0))
|
| 256 |
+
|
| 257 |
+
# Check right hand to face proximity
|
| 258 |
+
if right_hand:
|
| 259 |
+
right_index_tip = right_hand.landmark[8] # Index finger tip
|
| 260 |
+
nose_tip = face_landmarks.landmark[4]
|
| 261 |
+
|
| 262 |
+
right_to_face_dist = self._calculate_distance(right_index_tip, nose_tip)
|
| 263 |
+
self_touch = max(self_touch, 1.0 - min(right_to_face_dist * 5, 1.0))
|
| 264 |
+
|
| 265 |
+
# Track history
|
| 266 |
+
self.self_touch_history.append(self_touch)
|
| 267 |
+
|
| 268 |
+
# Update current state
|
| 269 |
+
self.current_state['self_touching'] = (
|
| 270 |
+
self_touch > self.thresholds['self_touch'])
|
| 271 |
+
|
| 272 |
+
return self_touch
|
| 273 |
+
|
| 274 |
+
def _analyze_crossing_arms(self, pose_landmarks):
|
| 275 |
+
"""Detect if arms are crossed."""
|
| 276 |
+
crossing_score = 0
|
| 277 |
+
|
| 278 |
+
if pose_landmarks:
|
| 279 |
+
# Get key landmarks
|
| 280 |
+
left_shoulder = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.LEFT_SHOULDER]
|
| 281 |
+
right_shoulder = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.RIGHT_SHOULDER]
|
| 282 |
+
left_elbow = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.LEFT_ELBOW]
|
| 283 |
+
right_elbow = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.RIGHT_ELBOW]
|
| 284 |
+
left_wrist = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.LEFT_WRIST]
|
| 285 |
+
right_wrist = pose_landmarks.landmark[self.mp_holistic.PoseLandmark.RIGHT_WRIST]
|
| 286 |
+
|
| 287 |
+
# Check if wrists are crossing the center line
|
| 288 |
+
center_x = (left_shoulder.x + right_shoulder.x) / 2
|
| 289 |
+
|
| 290 |
+
left_wrist_right_of_center = left_wrist.x > center_x
|
| 291 |
+
right_wrist_left_of_center = right_wrist.x < center_x
|
| 292 |
+
|
| 293 |
+
elbows_down = (left_elbow.y > left_shoulder.y and
|
| 294 |
+
right_elbow.y > right_shoulder.y)
|
| 295 |
+
|
| 296 |
+
# Simple heuristic for crossed arms
|
| 297 |
+
if left_wrist_right_of_center and right_wrist_left_of_center and elbows_down:
|
| 298 |
+
# Calculate how far the wrists have crossed
|
| 299 |
+
left_cross_amount = (left_wrist.x - center_x) / (right_shoulder.x - center_x)
|
| 300 |
+
right_cross_amount = (center_x - right_wrist.x) / (center_x - left_shoulder.x)
|
| 301 |
+
|
| 302 |
+
crossing_score = min(1.0, (left_cross_amount + right_cross_amount) / 2)
|
| 303 |
+
|
| 304 |
+
# Track history
|
| 305 |
+
self.crossing_arms_history.append(crossing_score)
|
| 306 |
+
|
| 307 |
+
# Update current state
|
| 308 |
+
self.current_state['arms_crossed'] = (
|
| 309 |
+
crossing_score > self.thresholds['crossing_arms'])
|
| 310 |
+
|
| 311 |
+
return crossing_score
|
| 312 |
+
|
| 313 |
+
def _analyze_fidgeting(self, pose_landmarks, left_hand, right_hand):
|
| 314 |
+
"""Detect small repetitive movements (fidgeting)."""
|
| 315 |
+
fidgeting_score = 0
|
| 316 |
+
|
| 317 |
+
# Check for small hand movements
|
| 318 |
+
if self.prev_left_hand_landmarks and left_hand:
|
| 319 |
+
# Calculate average movement of all finger joints
|
| 320 |
+
total_movement = 0
|
| 321 |
+
count = 0
|
| 322 |
+
|
| 323 |
+
for i in range(21): # 21 hand landmarks
|
| 324 |
+
if i < len(left_hand.landmark) and i < len(self.prev_left_hand_landmarks.landmark):
|
| 325 |
+
movement = self._calculate_landmark_movement(
|
| 326 |
+
left_hand.landmark[i],
|
| 327 |
+
self.prev_left_hand_landmarks.landmark[i]
|
| 328 |
+
)
|
| 329 |
+
total_movement += movement
|
| 330 |
+
count += 1
|
| 331 |
+
|
| 332 |
+
if count > 0:
|
| 333 |
+
avg_movement = total_movement / count
|
| 334 |
+
fidgeting_score = max(fidgeting_score, avg_movement)
|
| 335 |
+
|
| 336 |
+
# Similar for right hand
|
| 337 |
+
if self.prev_right_hand_landmarks and right_hand:
|
| 338 |
+
total_movement = 0
|
| 339 |
+
count = 0
|
| 340 |
+
|
| 341 |
+
for i in range(21): # 21 hand landmarks
|
| 342 |
+
if i < len(right_hand.landmark) and i < len(self.prev_right_hand_landmarks.landmark):
|
| 343 |
+
movement = self._calculate_landmark_movement(
|
| 344 |
+
right_hand.landmark[i],
|
| 345 |
+
self.prev_right_hand_landmarks.landmark[i]
|
| 346 |
+
)
|
| 347 |
+
total_movement += movement
|
| 348 |
+
count += 1
|
| 349 |
+
|
| 350 |
+
if count > 0:
|
| 351 |
+
avg_movement = total_movement / count
|
| 352 |
+
fidgeting_score = max(fidgeting_score, avg_movement)
|
| 353 |
+
|
| 354 |
+
# Track history
|
| 355 |
+
self.fidgeting_history.append(fidgeting_score)
|
| 356 |
+
|
| 357 |
+
# Update current state - fidgeting is when movement is small but persistent
|
| 358 |
+
self.current_state['fidgeting'] = (
|
| 359 |
+
fidgeting_score > self.thresholds['fidgeting'] and
|
| 360 |
+
fidgeting_score < self.thresholds['hand_movement'])
|
| 361 |
+
|
| 362 |
+
return fidgeting_score
|
| 363 |
+
|
| 364 |
+
def _analyze_pose_shift(self, pose_landmarks):
|
| 365 |
+
"""Detect major posture shifts."""
|
| 366 |
+
pose_shift = 0
|
| 367 |
+
|
| 368 |
+
if pose_landmarks and self.prev_pose_landmarks:
|
| 369 |
+
# Calculate average movement of all upper body landmarks
|
| 370 |
+
upper_body_landmarks = [
|
| 371 |
+
self.mp_holistic.PoseLandmark.LEFT_SHOULDER,
|
| 372 |
+
self.mp_holistic.PoseLandmark.RIGHT_SHOULDER,
|
| 373 |
+
self.mp_holistic.PoseLandmark.LEFT_ELBOW,
|
| 374 |
+
self.mp_holistic.PoseLandmark.RIGHT_ELBOW,
|
| 375 |
+
self.mp_holistic.PoseLandmark.LEFT_WRIST,
|
| 376 |
+
self.mp_holistic.PoseLandmark.RIGHT_WRIST,
|
| 377 |
+
self.mp_holistic.PoseLandmark.LEFT_HIP,
|
| 378 |
+
self.mp_holistic.PoseLandmark.RIGHT_HIP
|
| 379 |
+
]
|
| 380 |
+
|
| 381 |
+
total_movement = 0
|
| 382 |
+
for landmark_idx in upper_body_landmarks:
|
| 383 |
+
movement = self._calculate_landmark_movement(
|
| 384 |
+
pose_landmarks.landmark[landmark_idx],
|
| 385 |
+
self.prev_pose_landmarks.landmark[landmark_idx]
|
| 386 |
+
)
|
| 387 |
+
total_movement += movement
|
| 388 |
+
|
| 389 |
+
pose_shift = total_movement / len(upper_body_landmarks)
|
| 390 |
+
|
| 391 |
+
# Track history
|
| 392 |
+
self.pose_shift_history.append(pose_shift)
|
| 393 |
+
|
| 394 |
+
# Update current state
|
| 395 |
+
current_time = time.time()
|
| 396 |
+
if pose_shift > self.thresholds['pose_shift']:
|
| 397 |
+
self.current_state['pose_shifting'] = 1
|
| 398 |
+
self.current_state['last_pose_shift'] = current_time
|
| 399 |
+
elif current_time - self.current_state['last_pose_shift'] > 3: # Reset after 3 seconds
|
| 400 |
+
self.current_state['pose_shifting'] = 0
|
| 401 |
+
|
| 402 |
+
return pose_shift
|
| 403 |
+
|
| 404 |
+
def process_frame(self, frame, annotate=False):
|
| 405 |
+
"""
|
| 406 |
+
Process a single frame to analyze body language.
|
| 407 |
+
|
| 408 |
+
Args:
|
| 409 |
+
frame: The video frame (BGR format)
|
| 410 |
+
annotate: Whether to draw annotations on the frame
|
| 411 |
+
|
| 412 |
+
Returns:
|
| 413 |
+
dict: Body language metrics for this frame
|
| 414 |
+
frame: Annotated frame if annotate=True, otherwise original frame
|
| 415 |
+
"""
|
| 416 |
+
self.total_frames += 1
|
| 417 |
+
frame_metrics = {
|
| 418 |
+
'timestamp': time.time(),
|
| 419 |
+
'frame_number': self.total_frames
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
# Convert to RGB for MediaPipe
|
| 423 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 424 |
+
|
| 425 |
+
# Process the frame
|
| 426 |
+
results = self.holistic.process(frame_rgb)
|
| 427 |
+
|
| 428 |
+
# Make a copy for annotations if needed
|
| 429 |
+
if annotate:
|
| 430 |
+
annotated_frame = frame.copy()
|
| 431 |
+
else:
|
| 432 |
+
annotated_frame = frame
|
| 433 |
+
|
| 434 |
+
# Analyze different aspects of body language
|
| 435 |
+
if results.pose_landmarks:
|
| 436 |
+
# Posture analysis
|
| 437 |
+
shoulder_alignment = self._analyze_shoulder_alignment(results.pose_landmarks)
|
| 438 |
+
lean_forward = self._analyze_lean_forward(results.pose_landmarks)
|
| 439 |
+
|
| 440 |
+
frame_metrics['shoulder_alignment'] = shoulder_alignment
|
| 441 |
+
frame_metrics['lean_forward'] = lean_forward
|
| 442 |
+
|
| 443 |
+
# Arms crossed analysis
|
| 444 |
+
crossing_arms = self._analyze_crossing_arms(results.pose_landmarks)
|
| 445 |
+
frame_metrics['crossing_arms'] = crossing_arms
|
| 446 |
+
|
| 447 |
+
# Pose shift analysis
|
| 448 |
+
pose_shift = self._analyze_pose_shift(results.pose_landmarks)
|
| 449 |
+
frame_metrics['pose_shift'] = pose_shift
|
| 450 |
+
|
| 451 |
+
if results.face_landmarks:
|
| 452 |
+
# Head tilt analysis
|
| 453 |
+
head_tilt = self._analyze_head_tilt(results.face_landmarks)
|
| 454 |
+
frame_metrics['head_tilt'] = head_tilt
|
| 455 |
+
|
| 456 |
+
# Hand movement and gestures
|
| 457 |
+
hand_movement = self._analyze_hand_movement(
|
| 458 |
+
results.left_hand_landmarks,
|
| 459 |
+
results.right_hand_landmarks
|
| 460 |
+
)
|
| 461 |
+
frame_metrics['hand_movement'] = hand_movement
|
| 462 |
+
|
| 463 |
+
# Self-touch detection
|
| 464 |
+
self_touch = self._analyze_self_touch(
|
| 465 |
+
results.pose_landmarks,
|
| 466 |
+
results.left_hand_landmarks,
|
| 467 |
+
results.right_hand_landmarks,
|
| 468 |
+
results.face_landmarks
|
| 469 |
+
)
|
| 470 |
+
frame_metrics['self_touch'] = self_touch
|
| 471 |
+
|
| 472 |
+
# Fidgeting detection
|
| 473 |
+
fidgeting = self._analyze_fidgeting(
|
| 474 |
+
results.pose_landmarks,
|
| 475 |
+
results.left_hand_landmarks,
|
| 476 |
+
results.right_hand_landmarks
|
| 477 |
+
)
|
| 478 |
+
frame_metrics['fidgeting'] = fidgeting
|
| 479 |
+
|
| 480 |
+
# Store current landmarks for next frame comparison
|
| 481 |
+
self.prev_pose_landmarks = results.pose_landmarks
|
| 482 |
+
self.prev_face_landmarks = results.face_landmarks
|
| 483 |
+
self.prev_left_hand_landmarks = results.left_hand_landmarks
|
| 484 |
+
self.prev_right_hand_landmarks = results.right_hand_landmarks
|
| 485 |
+
|
| 486 |
+
# Add current state to metrics
|
| 487 |
+
for key, value in self.current_state.items():
|
| 488 |
+
if key != 'last_pose_shift': # Skip timestamp
|
| 489 |
+
frame_metrics[key] = value
|
| 490 |
+
|
| 491 |
+
# Draw annotations if requested
|
| 492 |
+
if annotate:
|
| 493 |
+
# Draw pose landmarks
|
| 494 |
+
if results.pose_landmarks:
|
| 495 |
+
self.mp_drawing.draw_landmarks(
|
| 496 |
+
annotated_frame,
|
| 497 |
+
results.pose_landmarks,
|
| 498 |
+
self.mp_holistic.POSE_CONNECTIONS,
|
| 499 |
+
landmark_drawing_spec=self.mp_drawing_styles.get_default_pose_landmarks_style()
|
| 500 |
+
)
|
| 501 |
+
|
| 502 |
+
# Draw face landmarks
|
| 503 |
+
if results.face_landmarks:
|
| 504 |
+
self.mp_drawing.draw_landmarks(
|
| 505 |
+
annotated_frame,
|
| 506 |
+
results.face_landmarks,
|
| 507 |
+
self.mp_holistic.FACEMESH_TESSELATION,
|
| 508 |
+
landmark_drawing_spec=None,
|
| 509 |
+
connection_drawing_spec=self.mp_drawing_styles.get_default_face_mesh_tesselation_style()
|
| 510 |
+
)
|
| 511 |
+
|
| 512 |
+
# Draw hand landmarks
|
| 513 |
+
if results.left_hand_landmarks:
|
| 514 |
+
self.mp_drawing.draw_landmarks(
|
| 515 |
+
annotated_frame,
|
| 516 |
+
results.left_hand_landmarks,
|
| 517 |
+
self.mp_holistic.HAND_CONNECTIONS,
|
| 518 |
+
landmark_drawing_spec=self.mp_drawing_styles.get_default_hand_landmarks_style(),
|
| 519 |
+
connection_drawing_spec=self.mp_drawing_styles.get_default_hand_connections_style()
|
| 520 |
+
)
|
| 521 |
+
if results.right_hand_landmarks:
|
| 522 |
+
self.mp_drawing.draw_landmarks(
|
| 523 |
+
annotated_frame,
|
| 524 |
+
results.right_hand_landmarks,
|
| 525 |
+
self.mp_holistic.HAND_CONNECTIONS,
|
| 526 |
+
landmark_drawing_spec=self.mp_drawing_styles.get_default_hand_landmarks_style(),
|
| 527 |
+
connection_drawing_spec=self.mp_drawing_styles.get_default_hand_connections_style()
|
| 528 |
+
)
|
| 529 |
+
|
| 530 |
+
# Draw body language status on the frame
|
| 531 |
+
y_pos = 30
|
| 532 |
+
font_scale = 0.6
|
| 533 |
+
|
| 534 |
+
# Draw posture status
|
| 535 |
+
if self.current_state['shoulder_misalignment']:
|
| 536 |
+
cv2.putText(annotated_frame, "Uneven Shoulders", (20, y_pos),
|
| 537 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), 2)
|
| 538 |
+
y_pos += 25
|
| 539 |
+
|
| 540 |
+
if self.current_state['leaning_forward']:
|
| 541 |
+
cv2.putText(annotated_frame, "Leaning Forward", (20, y_pos),
|
| 542 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 0), 2)
|
| 543 |
+
y_pos += 25
|
| 544 |
+
|
| 545 |
+
if self.current_state['head_tilted']:
|
| 546 |
+
cv2.putText(annotated_frame, "Head Tilted", (20, y_pos),
|
| 547 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), 2)
|
| 548 |
+
y_pos += 25
|
| 549 |
+
|
| 550 |
+
# Draw gesture status
|
| 551 |
+
if self.current_state['hand_movement']:
|
| 552 |
+
cv2.putText(annotated_frame, "Gesturing", (20, y_pos),
|
| 553 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 0), 2)
|
| 554 |
+
y_pos += 25
|
| 555 |
+
|
| 556 |
+
if self.current_state['self_touching']:
|
| 557 |
+
cv2.putText(annotated_frame, "Self-Touching", (20, y_pos),
|
| 558 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), 2)
|
| 559 |
+
y_pos += 25
|
| 560 |
+
|
| 561 |
+
if self.current_state['arms_crossed']:
|
| 562 |
+
cv2.putText(annotated_frame, "Arms Crossed", (20, y_pos),
|
| 563 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), 2)
|
| 564 |
+
y_pos += 25
|
| 565 |
+
|
| 566 |
+
# Draw movement status
|
| 567 |
+
if self.current_state['fidgeting']:
|
| 568 |
+
cv2.putText(annotated_frame, "Fidgeting", (20, y_pos),
|
| 569 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), 2)
|
| 570 |
+
y_pos += 25
|
| 571 |
+
|
| 572 |
+
if self.current_state['pose_shifting']:
|
| 573 |
+
cv2.putText(annotated_frame, "Shifting Posture", (20, y_pos),
|
| 574 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), 2)
|
| 575 |
+
y_pos += 25
|
| 576 |
+
|
| 577 |
+
return frame_metrics, annotated_frame
|
| 578 |
+
|
| 579 |
+
def get_stats(self):
|
| 580 |
+
"""
|
| 581 |
+
Get comprehensive body language statistics.
|
| 582 |
+
|
| 583 |
+
Returns:
|
| 584 |
+
dict: Statistics about body language
|
| 585 |
+
"""
|
| 586 |
+
current_time = time.time()
|
| 587 |
+
total_duration = current_time - self.start_time
|
| 588 |
+
|
| 589 |
+
# Calculate stats for different metrics
|
| 590 |
+
stats = {
|
| 591 |
+
'total_frames': self.total_frames,
|
| 592 |
+
'total_duration_seconds': total_duration,
|
| 593 |
+
|
| 594 |
+
# Posture stats
|
| 595 |
+
'shoulder_misalignment_percentage': self._calculate_percentage(
|
| 596 |
+
[1 if x > self.thresholds['shoulder_alignment'] else 0
|
| 597 |
+
for x in self.shoulder_alignment_history]),
|
| 598 |
+
'leaning_forward_percentage': self._calculate_percentage(
|
| 599 |
+
[1 if x > self.thresholds['lean_forward'] else 0
|
| 600 |
+
for x in self.lean_forward_history]),
|
| 601 |
+
'head_tilt_percentage': self._calculate_percentage(
|
| 602 |
+
[1 if x > self.thresholds['head_tilt'] else 0
|
| 603 |
+
for x in self.head_tilt_history]),
|
| 604 |
+
|
| 605 |
+
# Gesture stats
|
| 606 |
+
'hand_movement_percentage': self._calculate_percentage(
|
| 607 |
+
[1 if x > self.thresholds['hand_movement'] else 0
|
| 608 |
+
for x in self.hand_movement_history]),
|
| 609 |
+
'self_touch_percentage': self._calculate_percentage(
|
| 610 |
+
[1 if x > self.thresholds['self_touch'] else 0
|
| 611 |
+
for x in self.self_touch_history]),
|
| 612 |
+
'arms_crossed_percentage': self._calculate_percentage(
|
| 613 |
+
[1 if x > self.thresholds['crossing_arms'] else 0
|
| 614 |
+
for x in self.crossing_arms_history]),
|
| 615 |
+
|
| 616 |
+
# Movement stats
|
| 617 |
+
'fidgeting_percentage': self._calculate_percentage(
|
| 618 |
+
[1 if (x > self.thresholds['fidgeting'] and x < self.thresholds['hand_movement']) else 0
|
| 619 |
+
for x in self.fidgeting_history]),
|
| 620 |
+
'pose_shifts_count': sum([1 if x > self.thresholds['pose_shift'] else 0
|
| 621 |
+
for x in self.pose_shift_history]),
|
| 622 |
+
|
| 623 |
+
# Average intensity (when present)
|
| 624 |
+
'avg_shoulder_misalignment': self._calculate_average(
|
| 625 |
+
[x for x in self.shoulder_alignment_history if x > self.thresholds['shoulder_alignment']]),
|
| 626 |
+
'avg_lean_forward': self._calculate_average(
|
| 627 |
+
[x for x in self.lean_forward_history if x > self.thresholds['lean_forward']]),
|
| 628 |
+
'avg_head_tilt': self._calculate_average(
|
| 629 |
+
[x for x in self.head_tilt_history if x > self.thresholds['head_tilt']]),
|
| 630 |
+
'avg_hand_movement': self._calculate_average(
|
| 631 |
+
[x for x in self.hand_movement_history if x > self.thresholds['hand_movement']]),
|
| 632 |
+
'avg_self_touch': self._calculate_average(
|
| 633 |
+
[x for x in self.self_touch_history if x > self.thresholds['self_touch']]),
|
| 634 |
+
'avg_arms_crossed': self._calculate_average(
|
| 635 |
+
[x for x in self.crossing_arms_history if x > self.thresholds['crossing_arms']]),
|
| 636 |
+
'avg_fidgeting': self._calculate_average(
|
| 637 |
+
[x for x in self.fidgeting_history if x > self.thresholds['fidgeting']
|
| 638 |
+
and x < self.thresholds['hand_movement']])
|
| 639 |
+
}
|
| 640 |
+
|
| 641 |
+
# Calculate pose shifts per minute
|
| 642 |
+
if total_duration > 0:
|
| 643 |
+
stats['pose_shifts_per_minute'] = stats['pose_shifts_count'] / (total_duration / 60)
|
| 644 |
+
else:
|
| 645 |
+
stats['pose_shifts_per_minute'] = 0
|
| 646 |
+
|
| 647 |
+
return stats
|
| 648 |
+
|
| 649 |
+
def _calculate_percentage(self, binary_list):
|
| 650 |
+
"""Calculate percentage of True/1 values in a list."""
|
| 651 |
+
if len(binary_list) == 0:
|
| 652 |
+
return 0
|
| 653 |
+
return sum(binary_list) / len(binary_list) * 100
|
| 654 |
+
|
| 655 |
+
def _calculate_average(self, values_list):
|
| 656 |
+
"""Calculate average of values in a list."""
|
| 657 |
+
if len(values_list) == 0:
|
| 658 |
+
return 0
|
| 659 |
+
return sum(values_list) / len(values_list)
|
| 660 |
+
|
| 661 |
+
def get_interview_assessment(self):
|
| 662 |
+
"""
|
| 663 |
+
Analyze body language patterns in the context of an interview.
|
| 664 |
+
|
| 665 |
+
Returns:
|
| 666 |
+
dict: Assessment of body language with interview-specific insights
|
| 667 |
+
"""
|
| 668 |
+
stats = self.get_stats()
|
| 669 |
+
|
| 670 |
+
# Initialize assessment
|
| 671 |
+
assessment = {
|
| 672 |
+
'confidence_score': 0, # 0-10 scale
|
| 673 |
+
'engagement_score': 0, # 0-10 scale
|
| 674 |
+
'comfort_score': 0, # 0-10 scale
|
| 675 |
+
'overall_score': 0, # 0-10 scale
|
| 676 |
+
'strengths': [],
|
| 677 |
+
'areas_for_improvement': [],
|
| 678 |
+
'recommendations': []
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
# CONFIDENCE SCORE
|
| 682 |
+
confidence_base = 7 # Start from a neutral-positive point
|
| 683 |
+
|
| 684 |
+
# Positive indicators of confidence
|
| 685 |
+
if stats['leaning_forward_percentage'] > 40:
|
| 686 |
+
confidence_base += 1
|
| 687 |
+
assessment['strengths'].append('Shows engagement by leaning forward')
|
| 688 |
+
|
| 689 |
+
if stats['hand_movement_percentage'] > 30 and stats['hand_movement_percentage'] < 70:
|
| 690 |
+
confidence_base += 1
|
| 691 |
+
assessment['strengths'].append('Uses appropriate hand gestures to emphasize points')
|
| 692 |
+
|
| 693 |
+
# Negative indicators
|
| 694 |
+
if stats['shoulder_misalignment_percentage'] > 30:
|
| 695 |
+
confidence_base -= 1
|
| 696 |
+
assessment['areas_for_improvement'].append('Uneven shoulders may convey tension')
|
| 697 |
+
assessment['recommendations'].append('Practice maintaining level shoulders')
|
| 698 |
+
|
| 699 |
+
if stats['self_touch_percentage'] > 30:
|
| 700 |
+
confidence_base -= 2
|
| 701 |
+
assessment['areas_for_improvement'].append('Frequent self-touching can signal nervousness')
|
| 702 |
+
assessment['recommendations'].append('Be mindful of touching your face or hair during interviews')
|
| 703 |
+
|
| 704 |
+
if stats['fidgeting_percentage'] > 40:
|
| 705 |
+
confidence_base -= 2
|
| 706 |
+
assessment['areas_for_improvement'].append('Fidgeting can distract from your message')
|
| 707 |
+
assessment['recommendations'].append('Practice stillness or channel energy into purposeful gestures')
|
| 708 |
+
|
| 709 |
+
if stats['arms_crossed_percentage'] > 50:
|
| 710 |
+
confidence_base -= 1
|
| 711 |
+
assessment['areas_for_improvement'].append('Frequently crossed arms can appear defensive')
|
| 712 |
+
assessment['recommendations'].append('Try to maintain a more open posture during interviews')
|
| 713 |
+
|
| 714 |
+
# Clamp confidence score to 0-10 range
|
| 715 |
+
assessment['confidence_score'] = max(0, min(10, confidence_base))
|
| 716 |
+
|
| 717 |
+
# ENGAGEMENT SCORE
|
| 718 |
+
engagement_base = 5 # Start from a neutral point
|
| 719 |
+
|
| 720 |
+
# Positive indicators of engagement
|
| 721 |
+
if stats['leaning_forward_percentage'] > 50:
|
| 722 |
+
engagement_base += 2
|
| 723 |
+
if 'Shows engagement by leaning forward' not in assessment['strengths']:
|
| 724 |
+
assessment['strengths'].append('Shows engagement by leaning forward')
|
| 725 |
+
|
| 726 |
+
if stats['hand_movement_percentage'] > 40:
|
| 727 |
+
engagement_base += 1
|
| 728 |
+
if 'Uses appropriate hand gestures to emphasize points' not in assessment['strengths']:
|
| 729 |
+
assessment['strengths'].append('Uses appropriate hand gestures to emphasize points')
|
| 730 |
+
|
| 731 |
+
# Negative indicators
|
| 732 |
+
if stats['pose_shifts_per_minute'] > 3:
|
| 733 |
+
engagement_base -= 1
|
| 734 |
+
assessment['areas_for_improvement'].append('Frequent posture shifts may indicate restlessness')
|
| 735 |
+
assessment['recommendations'].append('Work on maintaining a stable but comfortable posture')
|
| 736 |
+
|
| 737 |
+
if stats['arms_crossed_percentage'] > 60:
|
| 738 |
+
engagement_base -= 2
|
| 739 |
+
if 'Frequently crossed arms can appear defensive' not in assessment['areas_for_improvement']:
|
| 740 |
+
assessment['areas_for_improvement'].append('Crossed arms can signal disengagement or defensiveness')
|
| 741 |
+
|
| 742 |
+
# Clamp engagement score to 0-10 range
|
| 743 |
+
assessment['engagement_score'] = max(0, min(10, engagement_base))
|
| 744 |
+
|
| 745 |
+
# COMFORT SCORE
|
| 746 |
+
comfort_base = 6 # Start from a slightly positive point
|
| 747 |
+
|
| 748 |
+
# Negative indicators of comfort
|
| 749 |
+
if stats['fidgeting_percentage'] > 30:
|
| 750 |
+
comfort_base -= 1
|
| 751 |
+
if 'Fidgeting can distract from your message' not in assessment['areas_for_improvement']:
|
| 752 |
+
assessment['areas_for_improvement'].append('Fidgeting indicates nervousness or discomfort')
|
| 753 |
+
|
| 754 |
+
if stats['self_touch_percentage'] > 40:
|
| 755 |
+
comfort_base -= 1
|
| 756 |
+
if 'Frequent self-touching can signal nervousness' not in assessment['areas_for_improvement']:
|
| 757 |
+
assessment['areas_for_improvement'].append('Self-touching often indicates anxiety or discomfort')
|
| 758 |
+
|
| 759 |
+
if stats['pose_shifts_count'] > (stats['total_duration_seconds'] / 20): # More than 1 shift per 20 seconds
|
| 760 |
+
comfort_base -= 1
|
| 761 |
+
if 'Frequent posture shifts may indicate restlessness' not in assessment['areas_for_improvement']:
|
| 762 |
+
assessment['areas_for_improvement'].append('Frequent posture adjustments suggest discomfort')
|
| 763 |
+
assessment['recommendations'].append('Find a comfortable seated position before the interview')
|
| 764 |
+
|
| 765 |
+
# Positive indicators of comfort
|
| 766 |
+
if stats['shoulder_misalignment_percentage'] < 20:
|
| 767 |
+
comfort_base += 1
|
| 768 |
+
assessment['strengths'].append('Maintains balanced, relaxed shoulder posture')
|
| 769 |
+
|
| 770 |
+
if stats['fidgeting_percentage'] < 15 and stats['self_touch_percentage'] < 15:
|
| 771 |
+
comfort_base += 2
|
| 772 |
+
assessment['strengths'].append('Appears calm and composed through minimal nervous movements')
|
| 773 |
+
|
| 774 |
+
# Clamp comfort score to 0-10 range
|
| 775 |
+
assessment['comfort_score'] = max(0, min(10, comfort_base))
|
| 776 |
+
|
| 777 |
+
# OVERALL SCORE - weighted average of the three component scores
|
| 778 |
+
assessment['overall_score'] = (
|
| 779 |
+
assessment['confidence_score'] * 0.4 +
|
| 780 |
+
assessment['engagement_score'] * 0.4 +
|
| 781 |
+
assessment['comfort_score'] * 0.2
|
| 782 |
+
)
|
| 783 |
+
|
| 784 |
+
# Add general recommendations if none were added
|
| 785 |
+
if not assessment['recommendations']:
|
| 786 |
+
assessment['recommendations'] = [
|
| 787 |
+
'Practice interviews with video recording to observe your body language',
|
| 788 |
+
'Focus on maintaining an open, engaged posture',
|
| 789 |
+
'Use purposeful hand gestures to emphasize key points'
|
| 790 |
+
]
|
| 791 |
+
|
| 792 |
+
# Add general strengths if none were identified
|
| 793 |
+
if not assessment['strengths']:
|
| 794 |
+
assessment['strengths'] = [
|
| 795 |
+
'Shows baseline appropriate interview body language',
|
| 796 |
+
'Maintains basic professional demeanor'
|
| 797 |
+
]
|
| 798 |
+
|
| 799 |
+
return assessment
|
| 800 |
+
|
| 801 |
+
|
| 802 |
+
def analyze_body_language(frame, analyzer=None, annotate=False):
|
| 803 |
+
"""
|
| 804 |
+
Analyze body language in a single frame.
|
| 805 |
+
|
| 806 |
+
Args:
|
| 807 |
+
frame: The video frame (BGR format)
|
| 808 |
+
analyzer: An existing BodyLanguageAnalyzer instance, or None to create a new one
|
| 809 |
+
annotate: Whether to annotate the frame with visualization
|
| 810 |
+
|
| 811 |
+
Returns:
|
| 812 |
+
tuple: (metrics, analyzer, annotated_frame)
|
| 813 |
+
- metrics: Dictionary of body language metrics for this frame
|
| 814 |
+
- analyzer: The BodyLanguageAnalyzer instance (new or updated)
|
| 815 |
+
- annotated_frame: The frame with annotations if requested
|
| 816 |
+
"""
|
| 817 |
+
if analyzer is None:
|
| 818 |
+
analyzer = BodyLanguageAnalyzer()
|
| 819 |
+
|
| 820 |
+
metrics, annotated_frame = analyzer.process_frame(frame, annotate)
|
| 821 |
+
return metrics, analyzer, annotated_frame
|
| 822 |
+
|
| 823 |
+
|
| 824 |
+
class InterviewAnalyzer:
|
| 825 |
+
"""
|
| 826 |
+
Combined analyzer for comprehensive interview assessment including
|
| 827 |
+
eye contact and body language.
|
| 828 |
+
"""
|
| 829 |
+
def __init__(self):
|
| 830 |
+
self.eye_contact_analyzer = EyeContactAnalyzer()
|
| 831 |
+
self.body_language_analyzer = BodyLanguageAnalyzer()
|
| 832 |
+
self.total_frames = 0
|
| 833 |
+
self.start_time = time.time()
|
| 834 |
+
self.frame_metrics = []
|
| 835 |
+
|
| 836 |
+
def reset(self):
|
| 837 |
+
"""Reset all analyzers for a new session."""
|
| 838 |
+
self.eye_contact_analyzer.reset_stats()
|
| 839 |
+
self.body_language_analyzer.reset_stats()
|
| 840 |
+
self.total_frames = 0
|
| 841 |
+
self.start_time = time.time()
|
| 842 |
+
self.frame_metrics = []
|
| 843 |
+
|
| 844 |
+
def process_frame(self, frame, annotate=False):
|
| 845 |
+
"""
|
| 846 |
+
Process a frame through both eye contact and body language analyzers.
|
| 847 |
+
|
| 848 |
+
Args:
|
| 849 |
+
frame: The video frame (BGR format)
|
| 850 |
+
annotate: Whether to annotate the frame with visualization
|
| 851 |
+
|
| 852 |
+
Returns:
|
| 853 |
+
tuple: (combined_metrics, annotated_frame)
|
| 854 |
+
"""
|
| 855 |
+
self.total_frames += 1
|
| 856 |
+
|
| 857 |
+
# Process with eye contact analyzer
|
| 858 |
+
eye_metrics, _, _ = analyze_eye_contact(frame, self.eye_contact_analyzer, False)
|
| 859 |
+
|
| 860 |
+
# Process with body language analyzer
|
| 861 |
+
body_metrics, body_frame = self.body_language_analyzer.process_frame(frame, annotate)
|
| 862 |
+
|
| 863 |
+
# Combine metrics
|
| 864 |
+
combined_metrics = {**eye_metrics, **body_metrics}
|
| 865 |
+
combined_metrics['frame_number'] = self.total_frames
|
| 866 |
+
combined_metrics['timestamp'] = time.time()
|
| 867 |
+
|
| 868 |
+
# Store frame metrics for later analysis
|
| 869 |
+
self.frame_metrics.append(combined_metrics)
|
| 870 |
+
|
| 871 |
+
return combined_metrics, body_frame
|
| 872 |
+
|
| 873 |
+
def get_comprehensive_assessment(self):
|
| 874 |
+
"""
|
| 875 |
+
Get a comprehensive assessment combining eye contact and body language insights.
|
| 876 |
+
|
| 877 |
+
Returns:
|
| 878 |
+
dict: Combined assessment with overall interview performance metrics
|
| 879 |
+
"""
|
| 880 |
+
# Get individual assessments
|
| 881 |
+
eye_contact_stats = self.eye_contact_analyzer.get_stats()
|
| 882 |
+
eye_contact_assessment = self.eye_contact_analyzer.get_interview_assessment()
|
| 883 |
+
|
| 884 |
+
body_language_stats = self.body_language_analyzer.get_stats()
|
| 885 |
+
body_language_assessment = self.body_language_analyzer.get_interview_assessment()
|
| 886 |
+
|
| 887 |
+
# Create combined assessment
|
| 888 |
+
assessment = {
|
| 889 |
+
'overall_score': (eye_contact_assessment['score'] * 0.4 +
|
| 890 |
+
body_language_assessment['overall_score'] * 0.6),
|
| 891 |
+
'eye_contact': {
|
| 892 |
+
'score': eye_contact_assessment['score'],
|
| 893 |
+
'patterns': eye_contact_assessment['patterns'],
|
| 894 |
+
'recommendations': eye_contact_assessment['recommendations']
|
| 895 |
+
},
|
| 896 |
+
'body_language': {
|
| 897 |
+
'confidence_score': body_language_assessment['confidence_score'],
|
| 898 |
+
'engagement_score': body_language_assessment['engagement_score'],
|
| 899 |
+
'comfort_score': body_language_assessment['comfort_score'],
|
| 900 |
+
'strengths': body_language_assessment['strengths'],
|
| 901 |
+
'areas_for_improvement': body_language_assessment['areas_for_improvement'],
|
| 902 |
+
'recommendations': body_language_assessment['recommendations']
|
| 903 |
+
},
|
| 904 |
+
'key_statistics': {
|
| 905 |
+
'total_duration_seconds': time.time() - self.start_time,
|
| 906 |
+
'total_frames': self.total_frames,
|
| 907 |
+
'eye_contact_percentage': eye_contact_stats['eye_contact_percentage'],
|
| 908 |
+
'longest_eye_contact_seconds': eye_contact_stats['longest_eye_contact_seconds'],
|
| 909 |
+
'average_contact_duration_seconds': eye_contact_stats['average_contact_duration_seconds'],
|
| 910 |
+
'shoulder_misalignment_percentage': body_language_stats['shoulder_misalignment_percentage'],
|
| 911 |
+
'leaning_forward_percentage': body_language_stats['leaning_forward_percentage'],
|
| 912 |
+
'head_tilt_percentage': body_language_stats['head_tilt_percentage'],
|
| 913 |
+
'arms_crossed_percentage': body_language_stats['arms_crossed_percentage'],
|
| 914 |
+
'self_touch_percentage': body_language_stats['self_touch_percentage'],
|
| 915 |
+
'fidgeting_percentage': body_language_stats['fidgeting_percentage'],
|
| 916 |
+
'pose_shifts_per_minute': body_language_stats['pose_shifts_per_minute']
|
| 917 |
+
},
|
| 918 |
+
'processing_info': {
|
| 919 |
+
'device_used': DEVICE
|
| 920 |
+
}
|
| 921 |
+
}
|
| 922 |
+
|
| 923 |
+
# Generate overall assessment text
|
| 924 |
+
if assessment['overall_score'] >= 8.5:
|
| 925 |
+
assessment['overall_assessment'] = "Excellent interview presence. Your body language and eye contact project confidence and engagement."
|
| 926 |
+
elif assessment['overall_score'] >= 7:
|
| 927 |
+
assessment['overall_assessment'] = "Strong interview presence with some minor areas for improvement."
|
| 928 |
+
elif assessment['overall_score'] >= 5.5:
|
| 929 |
+
assessment['overall_assessment'] = "Adequate interview presence with several areas that could be strengthened."
|
| 930 |
+
else:
|
| 931 |
+
assessment['overall_assessment'] = "Your interview presence needs significant improvement to make a positive impression."
|
| 932 |
+
|
| 933 |
+
return assessment
|
| 934 |
+
|
| 935 |
+
|
| 936 |
+
def example_interview_assessment():
|
| 937 |
+
"""
|
| 938 |
+
Generate an example interview assessment for demonstration purposes.
|
| 939 |
+
|
| 940 |
+
Returns:
|
| 941 |
+
dict: Example assessment
|
| 942 |
+
"""
|
| 943 |
+
assessment = {
|
| 944 |
+
'overall_score': 7.8,
|
| 945 |
+
'overall_assessment': "Strong interview presence with some minor areas for improvement.",
|
| 946 |
+
'eye_contact': {
|
| 947 |
+
'score': 8.0,
|
| 948 |
+
'patterns': ["Good eye contact maintained throughout most of the interview"],
|
| 949 |
+
'recommendations': ["Slightly reduce the intensity of eye contact in some moments"]
|
| 950 |
+
},
|
| 951 |
+
'body_language': {
|
| 952 |
+
'confidence_score': 7.5,
|
| 953 |
+
'engagement_score': 8.0,
|
| 954 |
+
'comfort_score': 7.0,
|
| 955 |
+
'strengths': [
|
| 956 |
+
"Good upright posture",
|
| 957 |
+
"Appropriate hand gestures",
|
| 958 |
+
"Engaged facial expressions"
|
| 959 |
+
],
|
| 960 |
+
'areas_for_improvement': [
|
| 961 |
+
"Occasional fidgeting",
|
| 962 |
+
"Some tension in shoulders"
|
| 963 |
+
],
|
| 964 |
+
'recommendations': [
|
| 965 |
+
"Practice relaxation techniques before interviews",
|
| 966 |
+
"Be mindful of hand movements when nervous",
|
| 967 |
+
"Maintain balanced posture throughout"
|
| 968 |
+
]
|
| 969 |
+
},
|
| 970 |
+
'key_statistics': {
|
| 971 |
+
'total_duration_seconds': 300.0,
|
| 972 |
+
'total_frames': 9000,
|
| 973 |
+
'eye_contact_percentage': 65.0,
|
| 974 |
+
'longest_eye_contact_seconds': 8.5,
|
| 975 |
+
'average_contact_duration_seconds': 4.2,
|
| 976 |
+
'shoulder_misalignment_percentage': 85.0,
|
| 977 |
+
'leaning_forward_percentage': 40.0,
|
| 978 |
+
'head_tilt_percentage': 15.0,
|
| 979 |
+
'arms_crossed_percentage': 10.0,
|
| 980 |
+
'self_touch_percentage': 25.0,
|
| 981 |
+
'fidgeting_percentage': 30.0,
|
| 982 |
+
'pose_shifts_per_minute': 2.5
|
| 983 |
+
},
|
| 984 |
+
'processing_info': {
|
| 985 |
+
'device_used': DEVICE
|
| 986 |
+
}
|
| 987 |
+
}
|
| 988 |
+
|
| 989 |
+
print("\n=== EXAMPLE INTERVIEW ASSESSMENT ===")
|
| 990 |
+
print(f"Overall Score: {assessment['overall_score']}/10")
|
| 991 |
+
print(f"Assessment: {assessment['overall_assessment']}")
|
| 992 |
+
|
| 993 |
+
print("\nEYE CONTACT:")
|
| 994 |
+
print(f"Score: {assessment['eye_contact']['score']}/10")
|
| 995 |
+
for pattern in assessment['eye_contact']['patterns']:
|
| 996 |
+
print(f"- {pattern}")
|
| 997 |
+
|
| 998 |
+
print("\nBODY LANGUAGE:")
|
| 999 |
+
print(f"Confidence Score: {assessment['body_language']['confidence_score']}/10")
|
| 1000 |
+
print(f"Engagement Score: {assessment['body_language']['engagement_score']}/10")
|
| 1001 |
+
print(f"Comfort Score: {assessment['body_language']['comfort_score']}/10")
|
| 1002 |
+
|
| 1003 |
+
print("\nSTRENGTHS:")
|
| 1004 |
+
for strength in assessment['body_language']['strengths']:
|
| 1005 |
+
print(f"+ {strength}")
|
| 1006 |
+
|
| 1007 |
+
print("\nAREAS FOR IMPROVEMENT:")
|
| 1008 |
+
for area in assessment['body_language']['areas_for_improvement']:
|
| 1009 |
+
print(f"- {area}")
|
| 1010 |
+
|
| 1011 |
+
print("\nPRIORITY RECOMMENDATIONS:")
|
| 1012 |
+
for i, rec in enumerate(assessment['body_language']['recommendations'], 1):
|
| 1013 |
+
print(f"{i}. {rec}")
|
| 1014 |
+
|
| 1015 |
+
return assessment
|
| 1016 |
+
|
| 1017 |
+
|
| 1018 |
+
def analyze_video_file(video_path, display_video=False, save_results=False):
|
| 1019 |
+
"""
|
| 1020 |
+
Analyze body language in a video file and get statistics.
|
| 1021 |
+
|
| 1022 |
+
Args:
|
| 1023 |
+
video_path: Path to the video file
|
| 1024 |
+
display_video: Whether to display the video during analysis
|
| 1025 |
+
save_results: Whether to save results to a JSON file
|
| 1026 |
+
|
| 1027 |
+
Returns:
|
| 1028 |
+
dict: Body language statistics and assessment
|
| 1029 |
+
"""
|
| 1030 |
+
# Open the video file
|
| 1031 |
+
cap = cv2.VideoCapture(video_path)
|
| 1032 |
+
if not cap.isOpened():
|
| 1033 |
+
print(f"Error: Could not open video file {video_path}")
|
| 1034 |
+
return None
|
| 1035 |
+
|
| 1036 |
+
# Get video properties
|
| 1037 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 1038 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 1039 |
+
duration = frame_count / fps if fps > 0 else 0
|
| 1040 |
+
|
| 1041 |
+
# Initialize analyzer
|
| 1042 |
+
analyzer = BodyLanguageAnalyzer()
|
| 1043 |
+
frame_number = 0
|
| 1044 |
+
|
| 1045 |
+
# Process each frame
|
| 1046 |
+
while cap.isOpened():
|
| 1047 |
+
ret, frame = cap.read()
|
| 1048 |
+
if not ret:
|
| 1049 |
+
break
|
| 1050 |
+
|
| 1051 |
+
# Process the frame
|
| 1052 |
+
metrics, analyzer, annotated_frame = analyze_body_language(frame, analyzer, display_video)
|
| 1053 |
+
|
| 1054 |
+
# Display progress
|
| 1055 |
+
frame_number += 1
|
| 1056 |
+
|
| 1057 |
+
# Display the frame if requested
|
| 1058 |
+
if display_video:
|
| 1059 |
+
cv2.imshow("Body Language Analysis", annotated_frame)
|
| 1060 |
+
|
| 1061 |
+
# Break if 'q' is pressed
|
| 1062 |
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
| 1063 |
+
break
|
| 1064 |
+
|
| 1065 |
+
# Clean up
|
| 1066 |
+
cap.release()
|
| 1067 |
+
if display_video:
|
| 1068 |
+
cv2.destroyAllWindows()
|
| 1069 |
+
|
| 1070 |
+
# Get statistics and assessment
|
| 1071 |
+
stats = analyzer.get_stats()
|
| 1072 |
+
assessment = analyzer.get_interview_assessment()
|
| 1073 |
+
|
| 1074 |
+
# Combine results
|
| 1075 |
+
results = {
|
| 1076 |
+
"video_info": {
|
| 1077 |
+
"path": video_path,
|
| 1078 |
+
"frames": frame_count,
|
| 1079 |
+
"fps": fps,
|
| 1080 |
+
"duration_seconds": duration,
|
| 1081 |
+
"device_used": DEVICE
|
| 1082 |
+
},
|
| 1083 |
+
"body_language_stats": stats,
|
| 1084 |
+
"assessment": assessment
|
| 1085 |
+
}
|
| 1086 |
+
|
| 1087 |
+
# Save results if requested
|
| 1088 |
+
if save_results:
|
| 1089 |
+
from datetime import datetime
|
| 1090 |
+
output_dir = os.path.join(os.path.dirname(video_path), "results")
|
| 1091 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 1092 |
+
output_file = f"{output_dir}/{Path(video_path).stem}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_body_language_analysis.json"
|
| 1093 |
+
with open(output_file, 'w') as f:
|
| 1094 |
+
json.dump(results, f, indent=4)
|
| 1095 |
+
|
| 1096 |
+
return results
|
| 1097 |
+
|
| 1098 |
+
|
| 1099 |
+
if __name__ == "__main__":
|
| 1100 |
+
example_interview_assessment()
|
behavior_backend/app/services/processing/emotion_analyzer.py
ADDED
|
@@ -0,0 +1,1733 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import cv2
|
| 3 |
+
import time
|
| 4 |
+
import json
|
| 5 |
+
import numpy as np
|
| 6 |
+
import hashlib
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Dict, Any, List, Tuple, Optional
|
| 9 |
+
from deepface import DeepFace
|
| 10 |
+
from collections import deque, OrderedDict
|
| 11 |
+
import torch
|
| 12 |
+
import torch.nn as nn
|
| 13 |
+
import torch.nn.functional as F
|
| 14 |
+
import mediapipe as mp
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# Fix import paths
|
| 18 |
+
try:
|
| 19 |
+
from app.utils.logging_utils import time_it, setup_logger
|
| 20 |
+
from app.utils.device_utils import device, run_on_device, get_available_device
|
| 21 |
+
except ImportError:
|
| 22 |
+
# Try relative imports for running from project root
|
| 23 |
+
from behavior_backend.app.utils.logging_utils import time_it, setup_logger
|
| 24 |
+
from behavior_backend.app.utils.device_utils import device, run_on_device, get_available_device
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# Configure logging
|
| 28 |
+
logger = setup_logger(__name__)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# Initialize device once at module level
|
| 32 |
+
DEVICE = get_available_device()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class LRUCache:
|
| 36 |
+
"""
|
| 37 |
+
LRU Cache implementation for caching analysis results.
|
| 38 |
+
This reduces redundant computation on identical frames or faces.
|
| 39 |
+
"""
|
| 40 |
+
def __init__(self, maxsize=128):
|
| 41 |
+
self.cache = OrderedDict()
|
| 42 |
+
self.maxsize = maxsize
|
| 43 |
+
self.hits = 0
|
| 44 |
+
self.misses = 0
|
| 45 |
+
|
| 46 |
+
def __getitem__(self, key):
|
| 47 |
+
if key in self.cache:
|
| 48 |
+
self.hits += 1
|
| 49 |
+
value = self.cache.pop(key)
|
| 50 |
+
self.cache[key] = value
|
| 51 |
+
return value
|
| 52 |
+
self.misses += 1
|
| 53 |
+
raise KeyError(key)
|
| 54 |
+
|
| 55 |
+
def __setitem__(self, key, value):
|
| 56 |
+
if key in self.cache:
|
| 57 |
+
self.cache.pop(key)
|
| 58 |
+
elif len(self.cache) >= self.maxsize:
|
| 59 |
+
self.cache.popitem(last=False)
|
| 60 |
+
self.cache[key] = value
|
| 61 |
+
|
| 62 |
+
def __contains__(self, key):
|
| 63 |
+
return key in self.cache
|
| 64 |
+
|
| 65 |
+
def get(self, key, default=None):
|
| 66 |
+
try:
|
| 67 |
+
return self[key]
|
| 68 |
+
except KeyError:
|
| 69 |
+
return default
|
| 70 |
+
|
| 71 |
+
def get_stats(self):
|
| 72 |
+
total = self.hits + self.misses
|
| 73 |
+
hit_rate = (self.hits / total * 100) if total > 0 else 0
|
| 74 |
+
return {
|
| 75 |
+
"hits": self.hits,
|
| 76 |
+
"misses": self.misses,
|
| 77 |
+
"hit_rate": hit_rate,
|
| 78 |
+
"size": len(self.cache),
|
| 79 |
+
"maxsize": self.maxsize
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
class EmotionAnalyzer:
|
| 84 |
+
"""Service for emotion analysis operations."""
|
| 85 |
+
|
| 86 |
+
def __init__(self,
|
| 87 |
+
min_face_size_ratio: float = 0.05,
|
| 88 |
+
max_face_size_ratio: float = 0.95,
|
| 89 |
+
min_confidence: float = 0.4,
|
| 90 |
+
face_aspect_ratio_range: Tuple[float, float] = (0.4, 2.0),
|
| 91 |
+
iou_threshold: float = 0.3,
|
| 92 |
+
min_detection_persistence: int = 2,
|
| 93 |
+
max_face_movement: float = 0.3,
|
| 94 |
+
center_face_priority: bool = True,
|
| 95 |
+
emotion_smoothing_window: int = 5,
|
| 96 |
+
emotion_confidence_threshold: float = 20.0,
|
| 97 |
+
emotion_stability_threshold: float = 0.4,
|
| 98 |
+
enable_cache: bool = True,
|
| 99 |
+
cache_size: int = 128,
|
| 100 |
+
batch_size: int = 4,
|
| 101 |
+
skip_similar_frames: bool = True):
|
| 102 |
+
"""Initialize the emotion analyzer with robustness parameters."""
|
| 103 |
+
self.backends = {
|
| 104 |
+
'opencv': self._analyze_opencv,
|
| 105 |
+
'mediapipe': self._analyze_mediapipe,
|
| 106 |
+
'mtcnn': self._analyze_mtcnn,
|
| 107 |
+
'ssd': self._analyze_ssd,
|
| 108 |
+
'retinaface': self._analyze_retinaface
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
# Parameters for robust face detection
|
| 112 |
+
self.min_face_size_ratio = min_face_size_ratio
|
| 113 |
+
self.max_face_size_ratio = max_face_size_ratio
|
| 114 |
+
self.min_confidence = min_confidence
|
| 115 |
+
self.face_aspect_ratio_range = face_aspect_ratio_range
|
| 116 |
+
self.iou_threshold = iou_threshold
|
| 117 |
+
self.min_detection_persistence = min_detection_persistence
|
| 118 |
+
self.max_face_movement = max_face_movement
|
| 119 |
+
self.center_face_priority = center_face_priority
|
| 120 |
+
|
| 121 |
+
# Parameters for emotion stability
|
| 122 |
+
self.emotion_smoothing_window = emotion_smoothing_window
|
| 123 |
+
self.emotion_confidence_threshold = emotion_confidence_threshold
|
| 124 |
+
self.emotion_stability_threshold = emotion_stability_threshold
|
| 125 |
+
|
| 126 |
+
# Performance optimization parameters
|
| 127 |
+
self.enable_cache = enable_cache
|
| 128 |
+
self.batch_size = batch_size
|
| 129 |
+
self.skip_similar_frames = skip_similar_frames
|
| 130 |
+
|
| 131 |
+
# Face tracking state
|
| 132 |
+
self.previous_faces = []
|
| 133 |
+
self.face_history = []
|
| 134 |
+
self.frame_count = 0
|
| 135 |
+
self.main_face_id = None
|
| 136 |
+
self.emotion_history = {}
|
| 137 |
+
self.last_stable_emotion = None
|
| 138 |
+
self.emotion_stability_count = {}
|
| 139 |
+
|
| 140 |
+
# Cache for results
|
| 141 |
+
if self.enable_cache:
|
| 142 |
+
self.frame_cache = LRUCache(maxsize=cache_size)
|
| 143 |
+
self.emotion_cache = LRUCache(maxsize=cache_size)
|
| 144 |
+
self.face_cache = LRUCache(maxsize=cache_size)
|
| 145 |
+
|
| 146 |
+
# Initialize and cache models
|
| 147 |
+
self._init_face_detection()
|
| 148 |
+
|
| 149 |
+
# Cache for preprocessed frames
|
| 150 |
+
self.last_frame = None
|
| 151 |
+
self.last_processed_frame = None
|
| 152 |
+
self.last_frame_hash = None
|
| 153 |
+
|
| 154 |
+
# Initialize CLAHE once
|
| 155 |
+
self.clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 156 |
+
|
| 157 |
+
# Pre-compute gamma lookup table
|
| 158 |
+
self.gamma_lut = np.empty((1,256), np.uint8)
|
| 159 |
+
gamma = 1.2
|
| 160 |
+
for i in range(256):
|
| 161 |
+
self.gamma_lut[0,i] = np.clip(pow(i / 255.0, gamma) * 255.0, 0, 255)
|
| 162 |
+
|
| 163 |
+
# Check if CUDA is available for batch processing
|
| 164 |
+
self.cuda_available = torch.cuda.is_available() and DEVICE == 'cuda'
|
| 165 |
+
if self.cuda_available:
|
| 166 |
+
logger.info("CUDA is available for batch processing")
|
| 167 |
+
else:
|
| 168 |
+
logger.info(f"CUDA is not available, using {DEVICE} for processing")
|
| 169 |
+
|
| 170 |
+
# Initialize parallel processing pool if available
|
| 171 |
+
try:
|
| 172 |
+
import multiprocessing
|
| 173 |
+
self.n_processors = min(multiprocessing.cpu_count(), 4) # Limit to 4 cores
|
| 174 |
+
self.use_multiprocessing = self.n_processors > 1 and not self.cuda_available
|
| 175 |
+
if self.use_multiprocessing:
|
| 176 |
+
logger.info(f"Multiprocessing enabled with {self.n_processors} processors")
|
| 177 |
+
except:
|
| 178 |
+
self.use_multiprocessing = False
|
| 179 |
+
logger.warning("Multiprocessing initialization failed, using sequential processing")
|
| 180 |
+
|
| 181 |
+
def _init_face_detection(self):
|
| 182 |
+
"""Initialize face detection models with optimized parameters."""
|
| 183 |
+
self.mp_face_detection = mp.solutions.face_detection
|
| 184 |
+
self.mp_drawing = mp.solutions.drawing_utils
|
| 185 |
+
|
| 186 |
+
# Initialize MediaPipe Face Detection with optimized parameters
|
| 187 |
+
self.face_detection = self.mp_face_detection.FaceDetection(
|
| 188 |
+
model_selection=1, # Use full-range model
|
| 189 |
+
min_detection_confidence=self.min_confidence
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
# Initialize OpenCV face cascade for backup
|
| 193 |
+
self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
| 194 |
+
|
| 195 |
+
def _preprocess_frame(self, frame: np.ndarray) -> np.ndarray:
|
| 196 |
+
"""
|
| 197 |
+
Optimized preprocessing for better face detection with frame caching.
|
| 198 |
+
"""
|
| 199 |
+
# Generate a hash for the frame to check cache
|
| 200 |
+
if self.enable_cache:
|
| 201 |
+
# Compute hash only on a downscaled grayscale version for efficiency
|
| 202 |
+
small_frame = cv2.resize(frame, (32, 32))
|
| 203 |
+
gray_small = cv2.cvtColor(small_frame, cv2.COLOR_BGR2GRAY)
|
| 204 |
+
frame_hash = hashlib.md5(gray_small.tobytes()).hexdigest()
|
| 205 |
+
|
| 206 |
+
# Check if this is the same as the last frame
|
| 207 |
+
if frame_hash == self.last_frame_hash:
|
| 208 |
+
return self.last_processed_frame
|
| 209 |
+
|
| 210 |
+
# Check if we have this frame in cache
|
| 211 |
+
cached_result = self.frame_cache.get(frame_hash)
|
| 212 |
+
if cached_result is not None:
|
| 213 |
+
return cached_result
|
| 214 |
+
|
| 215 |
+
self.last_frame_hash = frame_hash
|
| 216 |
+
# Check if this frame was already processed (for back-compatibility)
|
| 217 |
+
elif self.last_frame is not None and np.array_equal(frame, self.last_frame):
|
| 218 |
+
return self.last_processed_frame
|
| 219 |
+
|
| 220 |
+
# Basic preprocessing only - full preprocessing moved to backup path
|
| 221 |
+
processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 222 |
+
|
| 223 |
+
# Cache the results
|
| 224 |
+
self.last_frame = frame.copy()
|
| 225 |
+
self.last_processed_frame = processed
|
| 226 |
+
|
| 227 |
+
# Add to cache if enabled
|
| 228 |
+
if self.enable_cache:
|
| 229 |
+
self.frame_cache[frame_hash] = processed
|
| 230 |
+
|
| 231 |
+
return processed
|
| 232 |
+
|
| 233 |
+
def _enhanced_preprocess_frame(self, frame: np.ndarray) -> np.ndarray:
|
| 234 |
+
"""
|
| 235 |
+
Enhanced preprocessing for backup detection path.
|
| 236 |
+
Only used when primary detection fails.
|
| 237 |
+
"""
|
| 238 |
+
# Convert to LAB color space
|
| 239 |
+
lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
|
| 240 |
+
l, a, b = cv2.split(lab)
|
| 241 |
+
|
| 242 |
+
# Apply CLAHE to L channel
|
| 243 |
+
cl = self.clahe.apply(l)
|
| 244 |
+
|
| 245 |
+
# Merge channels back
|
| 246 |
+
enhanced_lab = cv2.merge((cl, a, b))
|
| 247 |
+
enhanced = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
|
| 248 |
+
|
| 249 |
+
# Apply pre-computed gamma correction
|
| 250 |
+
gamma_corrected = cv2.LUT(enhanced, self.gamma_lut)
|
| 251 |
+
|
| 252 |
+
return gamma_corrected
|
| 253 |
+
|
| 254 |
+
def _smooth_emotions(self, face_id: int, emotions: Dict[str, float]) -> Dict[str, float]:
|
| 255 |
+
"""
|
| 256 |
+
Apply temporal smoothing to emotions to reduce fluctuations.
|
| 257 |
+
|
| 258 |
+
Args:
|
| 259 |
+
face_id: Identifier for the face
|
| 260 |
+
emotions: Current emotion scores
|
| 261 |
+
|
| 262 |
+
Returns:
|
| 263 |
+
Smoothed emotion scores
|
| 264 |
+
"""
|
| 265 |
+
# Initialize history for this face if not exists
|
| 266 |
+
if face_id not in self.emotion_history:
|
| 267 |
+
self.emotion_history[face_id] = deque(maxlen=self.emotion_smoothing_window)
|
| 268 |
+
|
| 269 |
+
# Add current emotions to history
|
| 270 |
+
self.emotion_history[face_id].append(emotions)
|
| 271 |
+
|
| 272 |
+
# If we don't have enough history, return current emotions
|
| 273 |
+
if len(self.emotion_history[face_id]) < 2:
|
| 274 |
+
return emotions
|
| 275 |
+
|
| 276 |
+
# Calculate smoothed emotions
|
| 277 |
+
smoothed = {}
|
| 278 |
+
for emotion in emotions:
|
| 279 |
+
# Get history of this emotion
|
| 280 |
+
values = [frame_emotions.get(emotion, 0) for frame_emotions in self.emotion_history[face_id]]
|
| 281 |
+
# Apply exponential weighting (more recent frames have higher weight)
|
| 282 |
+
weights = [0.6 ** i for i in range(len(values))]
|
| 283 |
+
weights.reverse() # Most recent frame gets highest weight
|
| 284 |
+
weighted_sum = sum(w * v for w, v in zip(weights, values))
|
| 285 |
+
weight_sum = sum(weights)
|
| 286 |
+
smoothed[emotion] = weighted_sum / weight_sum if weight_sum > 0 else 0
|
| 287 |
+
|
| 288 |
+
return smoothed
|
| 289 |
+
|
| 290 |
+
def _check_emotion_stability(self, emotions: Dict[str, float]) -> Tuple[str, float, bool]:
|
| 291 |
+
"""
|
| 292 |
+
Check if the dominant emotion is stable across frames.
|
| 293 |
+
|
| 294 |
+
Args:
|
| 295 |
+
emotions: Current emotion scores
|
| 296 |
+
|
| 297 |
+
Returns:
|
| 298 |
+
Tuple of (dominant_emotion, confidence, is_stable)
|
| 299 |
+
"""
|
| 300 |
+
if not emotions:
|
| 301 |
+
return "neutral", 0.0, False
|
| 302 |
+
|
| 303 |
+
# Get dominant emotion
|
| 304 |
+
dominant_emotion, confidence = max(emotions.items(), key=lambda x: x[1])
|
| 305 |
+
|
| 306 |
+
# Check if confidence is above threshold
|
| 307 |
+
if confidence < self.emotion_confidence_threshold:
|
| 308 |
+
return "neutral", confidence, False
|
| 309 |
+
|
| 310 |
+
# Initialize stability count for new emotions
|
| 311 |
+
for emotion in emotions:
|
| 312 |
+
if emotion not in self.emotion_stability_count:
|
| 313 |
+
self.emotion_stability_count[emotion] = 0
|
| 314 |
+
|
| 315 |
+
# Update stability counts
|
| 316 |
+
for emotion in self.emotion_stability_count:
|
| 317 |
+
if emotion == dominant_emotion:
|
| 318 |
+
self.emotion_stability_count[emotion] += 1
|
| 319 |
+
else:
|
| 320 |
+
self.emotion_stability_count[emotion] = max(0, self.emotion_stability_count[emotion] - 1)
|
| 321 |
+
|
| 322 |
+
# Check if dominant emotion is stable
|
| 323 |
+
is_stable = self.emotion_stability_count.get(dominant_emotion, 0) >= 3
|
| 324 |
+
|
| 325 |
+
# If stable, update last stable emotion
|
| 326 |
+
if is_stable:
|
| 327 |
+
self.last_stable_emotion = (dominant_emotion, confidence)
|
| 328 |
+
# If not stable but we have a last stable emotion, check if current confidence is close
|
| 329 |
+
elif self.last_stable_emotion:
|
| 330 |
+
last_emotion, last_confidence = self.last_stable_emotion
|
| 331 |
+
# If current dominant emotion is different but close in confidence to last stable
|
| 332 |
+
if (dominant_emotion != last_emotion and
|
| 333 |
+
abs(confidence - last_confidence) < self.emotion_stability_threshold * last_confidence):
|
| 334 |
+
# Keep the last stable emotion
|
| 335 |
+
return last_emotion, last_confidence, True
|
| 336 |
+
|
| 337 |
+
return dominant_emotion, confidence, is_stable
|
| 338 |
+
|
| 339 |
+
def _find_center_face(self, faces: List[Dict], img_shape: Tuple[int, int, int]) -> Dict:
|
| 340 |
+
"""
|
| 341 |
+
Find the face closest to the center of the frame.
|
| 342 |
+
|
| 343 |
+
Args:
|
| 344 |
+
faces: List of detected faces
|
| 345 |
+
img_shape: Image shape (height, width, channels)
|
| 346 |
+
|
| 347 |
+
Returns:
|
| 348 |
+
The face closest to the center, or None if no faces
|
| 349 |
+
"""
|
| 350 |
+
if not faces:
|
| 351 |
+
return None
|
| 352 |
+
|
| 353 |
+
img_height, img_width = img_shape[:2]
|
| 354 |
+
img_center_x = img_width / 2
|
| 355 |
+
img_center_y = img_height / 2
|
| 356 |
+
|
| 357 |
+
closest_face = None
|
| 358 |
+
min_distance = float('inf')
|
| 359 |
+
|
| 360 |
+
for face in faces:
|
| 361 |
+
face_box = face.get('face_box', [0, 0, 0, 0])
|
| 362 |
+
x, y, w, h = face_box
|
| 363 |
+
|
| 364 |
+
# Calculate center of face
|
| 365 |
+
face_center_x = x + w / 2
|
| 366 |
+
face_center_y = y + h / 2
|
| 367 |
+
|
| 368 |
+
# Calculate distance to image center
|
| 369 |
+
distance = np.sqrt((face_center_x - img_center_x)**2 + (face_center_y - img_center_y)**2)
|
| 370 |
+
|
| 371 |
+
# Update closest face
|
| 372 |
+
if distance < min_distance:
|
| 373 |
+
min_distance = distance
|
| 374 |
+
closest_face = face
|
| 375 |
+
# Add distance to center as metadata
|
| 376 |
+
closest_face['center_distance'] = distance
|
| 377 |
+
closest_face['center_distance_ratio'] = distance / np.sqrt(img_width**2 + img_height**2)
|
| 378 |
+
|
| 379 |
+
return closest_face
|
| 380 |
+
|
| 381 |
+
def _calculate_iou(self, box1: List[int], box2: List[int]) -> float:
|
| 382 |
+
"""Calculate Intersection over Union between two bounding boxes."""
|
| 383 |
+
x1, y1, w1, h1 = box1
|
| 384 |
+
x2, y2, w2, h2 = box2
|
| 385 |
+
|
| 386 |
+
# Calculate intersection coordinates
|
| 387 |
+
xi1 = max(x1, x2)
|
| 388 |
+
yi1 = max(y1, y2)
|
| 389 |
+
xi2 = min(x1 + w1, x2 + w2)
|
| 390 |
+
yi2 = min(y1 + h1, y2 + h2)
|
| 391 |
+
|
| 392 |
+
if xi2 <= xi1 or yi2 <= yi1:
|
| 393 |
+
return 0.0
|
| 394 |
+
|
| 395 |
+
# Calculate areas
|
| 396 |
+
intersection_area = (xi2 - xi1) * (yi2 - yi1)
|
| 397 |
+
box1_area = w1 * h1
|
| 398 |
+
box2_area = w2 * h2
|
| 399 |
+
union_area = box1_area + box2_area - intersection_area
|
| 400 |
+
|
| 401 |
+
return intersection_area / union_area if union_area > 0 else 0.0
|
| 402 |
+
|
| 403 |
+
def _is_valid_face(self, face_box: List[int], img_shape: Tuple[int, int, int],
|
| 404 |
+
confidence: float = None) -> bool:
|
| 405 |
+
"""
|
| 406 |
+
Validate if a detected face is likely to be a real face.
|
| 407 |
+
|
| 408 |
+
Args:
|
| 409 |
+
face_box: Face bounding box [x, y, w, h]
|
| 410 |
+
img_shape: Image shape (height, width, channels)
|
| 411 |
+
confidence: Detection confidence score if available
|
| 412 |
+
|
| 413 |
+
Returns:
|
| 414 |
+
bool: True if the face is valid, False otherwise
|
| 415 |
+
"""
|
| 416 |
+
x, y, w, h = face_box
|
| 417 |
+
img_height, img_width = img_shape[:2]
|
| 418 |
+
|
| 419 |
+
# Check confidence threshold
|
| 420 |
+
if confidence is not None and confidence < self.min_confidence:
|
| 421 |
+
# Special case for SSD backend which may return 0 confidence
|
| 422 |
+
# but still have valid face detections
|
| 423 |
+
if confidence == 0 and w > 0 and h > 0:
|
| 424 |
+
# For SSD, we'll rely on other validation checks instead of confidence
|
| 425 |
+
pass
|
| 426 |
+
else:
|
| 427 |
+
return False
|
| 428 |
+
|
| 429 |
+
# Check face size relative to image
|
| 430 |
+
face_area = w * h
|
| 431 |
+
img_area = img_width * img_height
|
| 432 |
+
face_ratio = face_area / img_area
|
| 433 |
+
|
| 434 |
+
if face_ratio < self.min_face_size_ratio or face_ratio > self.max_face_size_ratio:
|
| 435 |
+
return False
|
| 436 |
+
|
| 437 |
+
# Check face aspect ratio (width/height)
|
| 438 |
+
aspect_ratio = w / h if h > 0 else 0
|
| 439 |
+
min_ratio, max_ratio = self.face_aspect_ratio_range
|
| 440 |
+
|
| 441 |
+
if aspect_ratio < min_ratio or aspect_ratio > max_ratio:
|
| 442 |
+
return False
|
| 443 |
+
|
| 444 |
+
# Check if face is within image boundaries with some margin
|
| 445 |
+
margin = 5
|
| 446 |
+
if (x < -margin or y < -margin or
|
| 447 |
+
x + w > img_width + margin or
|
| 448 |
+
y + h > img_height + margin):
|
| 449 |
+
return False
|
| 450 |
+
|
| 451 |
+
return True
|
| 452 |
+
|
| 453 |
+
def _check_temporal_consistency(self, current_faces: List[Dict], img_shape: Tuple[int, int, int]) -> List[Dict]:
|
| 454 |
+
"""
|
| 455 |
+
Filter faces based on temporal consistency with previous frames.
|
| 456 |
+
|
| 457 |
+
Args:
|
| 458 |
+
current_faces: List of detected faces in current frame
|
| 459 |
+
img_shape: Image shape
|
| 460 |
+
|
| 461 |
+
Returns:
|
| 462 |
+
List of validated faces
|
| 463 |
+
"""
|
| 464 |
+
self.frame_count += 1
|
| 465 |
+
img_width, img_height = img_shape[1], img_shape[0]
|
| 466 |
+
max_movement = self.max_face_movement * max(img_width, img_height)
|
| 467 |
+
|
| 468 |
+
# Initialize face tracking if this is the first frame
|
| 469 |
+
if not self.face_history:
|
| 470 |
+
self.face_history = [{
|
| 471 |
+
'face': face,
|
| 472 |
+
'persistence': 1,
|
| 473 |
+
'last_position': face['face_box'],
|
| 474 |
+
'stable': False,
|
| 475 |
+
'face_id': i # Assign unique ID to each face
|
| 476 |
+
} for i, face in enumerate(current_faces) if self._is_valid_face(face['face_box'], img_shape)]
|
| 477 |
+
|
| 478 |
+
# If center face priority is enabled, find the center face
|
| 479 |
+
if self.center_face_priority and current_faces:
|
| 480 |
+
center_face = self._find_center_face(current_faces, img_shape)
|
| 481 |
+
if center_face:
|
| 482 |
+
# Mark this as the main face
|
| 483 |
+
for i, tracked in enumerate(self.face_history):
|
| 484 |
+
if tracked['face'] == center_face:
|
| 485 |
+
self.main_face_id = tracked['face_id']
|
| 486 |
+
break
|
| 487 |
+
|
| 488 |
+
return current_faces
|
| 489 |
+
|
| 490 |
+
# Match current faces with tracking history
|
| 491 |
+
matched_faces = []
|
| 492 |
+
unmatched_current = current_faces.copy()
|
| 493 |
+
updated_history = []
|
| 494 |
+
|
| 495 |
+
for tracked_face in self.face_history:
|
| 496 |
+
best_match = None
|
| 497 |
+
best_iou = 0
|
| 498 |
+
best_match_idx = -1
|
| 499 |
+
|
| 500 |
+
# Find best matching face in current frame
|
| 501 |
+
for i, current_face in enumerate(unmatched_current):
|
| 502 |
+
if not self._is_valid_face(current_face['face_box'], img_shape):
|
| 503 |
+
continue
|
| 504 |
+
|
| 505 |
+
iou = self._calculate_iou(tracked_face['last_position'], current_face['face_box'])
|
| 506 |
+
|
| 507 |
+
# Check if movement is within allowed range
|
| 508 |
+
prev_center = (tracked_face['last_position'][0] + tracked_face['last_position'][2]/2,
|
| 509 |
+
tracked_face['last_position'][1] + tracked_face['last_position'][3]/2)
|
| 510 |
+
curr_center = (current_face['face_box'][0] + current_face['face_box'][2]/2,
|
| 511 |
+
current_face['face_box'][1] + current_face['face_box'][3]/2)
|
| 512 |
+
movement = np.sqrt((prev_center[0] - curr_center[0])**2 +
|
| 513 |
+
(prev_center[1] - curr_center[1])**2)
|
| 514 |
+
|
| 515 |
+
if iou > best_iou and iou >= self.iou_threshold and movement <= max_movement:
|
| 516 |
+
best_match = current_face
|
| 517 |
+
best_iou = iou
|
| 518 |
+
best_match_idx = i
|
| 519 |
+
|
| 520 |
+
if best_match:
|
| 521 |
+
# Update tracking info
|
| 522 |
+
persistence = tracked_face['persistence'] + 1
|
| 523 |
+
stable = persistence >= self.min_detection_persistence
|
| 524 |
+
|
| 525 |
+
# Apply emotion smoothing if emotions are present
|
| 526 |
+
if 'emotion' in best_match:
|
| 527 |
+
face_id = tracked_face['face_id']
|
| 528 |
+
best_match['emotion'] = self._smooth_emotions(face_id, best_match['emotion'])
|
| 529 |
+
|
| 530 |
+
# Add emotion stability information
|
| 531 |
+
dominant_emotion, confidence, is_stable = self._check_emotion_stability(best_match['emotion'])
|
| 532 |
+
best_match['dominant_emotion'] = dominant_emotion
|
| 533 |
+
best_match['emotion_confidence'] = confidence
|
| 534 |
+
best_match['emotion_stable'] = is_stable
|
| 535 |
+
|
| 536 |
+
updated_history.append({
|
| 537 |
+
'face': best_match,
|
| 538 |
+
'persistence': persistence,
|
| 539 |
+
'last_position': best_match['face_box'],
|
| 540 |
+
'stable': stable,
|
| 541 |
+
'face_id': tracked_face['face_id']
|
| 542 |
+
})
|
| 543 |
+
|
| 544 |
+
if stable:
|
| 545 |
+
matched_faces.append(best_match)
|
| 546 |
+
|
| 547 |
+
# Remove matched face from unmatched list
|
| 548 |
+
if best_match_idx != -1:
|
| 549 |
+
unmatched_current.pop(best_match_idx)
|
| 550 |
+
else:
|
| 551 |
+
# Face lost, reduce persistence
|
| 552 |
+
persistence = tracked_face['persistence'] - 1
|
| 553 |
+
if persistence > 0:
|
| 554 |
+
updated_history.append({
|
| 555 |
+
'face': tracked_face['face'],
|
| 556 |
+
'persistence': persistence,
|
| 557 |
+
'last_position': tracked_face['last_position'],
|
| 558 |
+
'stable': persistence >= self.min_detection_persistence,
|
| 559 |
+
'face_id': tracked_face['face_id']
|
| 560 |
+
})
|
| 561 |
+
|
| 562 |
+
# Add new unmatched faces to tracking
|
| 563 |
+
next_face_id = max([f['face_id'] for f in self.face_history], default=-1) + 1
|
| 564 |
+
for new_face in unmatched_current:
|
| 565 |
+
if self._is_valid_face(new_face['face_box'], img_shape):
|
| 566 |
+
updated_history.append({
|
| 567 |
+
'face': new_face,
|
| 568 |
+
'persistence': 1,
|
| 569 |
+
'last_position': new_face['face_box'],
|
| 570 |
+
'stable': False,
|
| 571 |
+
'face_id': next_face_id
|
| 572 |
+
})
|
| 573 |
+
next_face_id += 1
|
| 574 |
+
|
| 575 |
+
self.face_history = updated_history
|
| 576 |
+
|
| 577 |
+
# If center face priority is enabled, find the center face among stable faces
|
| 578 |
+
if self.center_face_priority and matched_faces:
|
| 579 |
+
center_face = self._find_center_face(matched_faces, img_shape)
|
| 580 |
+
if center_face:
|
| 581 |
+
# Mark this as the main face and put it first in the list
|
| 582 |
+
matched_faces.remove(center_face)
|
| 583 |
+
matched_faces.insert(0, center_face)
|
| 584 |
+
# Add a flag to indicate this is the main face
|
| 585 |
+
center_face['is_main_face'] = True
|
| 586 |
+
|
| 587 |
+
# Find the face_id for this center face
|
| 588 |
+
for tracked in self.face_history:
|
| 589 |
+
if tracked['face'] == center_face:
|
| 590 |
+
self.main_face_id = tracked['face_id']
|
| 591 |
+
break
|
| 592 |
+
|
| 593 |
+
# Return only stable faces
|
| 594 |
+
return matched_faces
|
| 595 |
+
|
| 596 |
+
@time_it
|
| 597 |
+
def analyze_frame(self, frame: np.ndarray, frame_index: int, backend: str = 'mediapipe') -> Dict[str, Any]:
|
| 598 |
+
"""
|
| 599 |
+
Analyze emotions in a video frame with caching and frame similarity detection.
|
| 600 |
+
|
| 601 |
+
Args:
|
| 602 |
+
frame: Video frame as numpy array
|
| 603 |
+
frame_index: Index of the frame
|
| 604 |
+
backend: Backend to use for face detection
|
| 605 |
+
|
| 606 |
+
Returns:
|
| 607 |
+
Dictionary with analysis results
|
| 608 |
+
"""
|
| 609 |
+
# Track total execution time
|
| 610 |
+
total_start_time = time.time()
|
| 611 |
+
|
| 612 |
+
# Track timing for each phase
|
| 613 |
+
timing_breakdown = {
|
| 614 |
+
'cache_check': 0,
|
| 615 |
+
'similarity_check': 0,
|
| 616 |
+
'face_detection': 0,
|
| 617 |
+
'emotion_analysis': 0,
|
| 618 |
+
'temporal_consistency': 0,
|
| 619 |
+
'misc_processing': 0
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
phase_start = time.time()
|
| 623 |
+
|
| 624 |
+
# 1. Check for identical frame in cache
|
| 625 |
+
if self.enable_cache:
|
| 626 |
+
# Create a fast hash for the frame
|
| 627 |
+
small_frame = cv2.resize(frame, (32, 32))
|
| 628 |
+
gray_small = cv2.cvtColor(small_frame, cv2.COLOR_BGR2GRAY)
|
| 629 |
+
frame_hash = hashlib.md5(gray_small.tobytes()).hexdigest()
|
| 630 |
+
|
| 631 |
+
# Check if we've already analyzed this exact frame
|
| 632 |
+
cache_key = f"{frame_hash}_{backend}"
|
| 633 |
+
cached_result = self.frame_cache.get(cache_key)
|
| 634 |
+
if cached_result is not None:
|
| 635 |
+
cached_result['from_cache'] = True
|
| 636 |
+
cached_result['frame_index'] = frame_index
|
| 637 |
+
|
| 638 |
+
# Update timings for cached result
|
| 639 |
+
cached_result['timing_breakdown'] = {
|
| 640 |
+
'cache_check': time.time() - phase_start,
|
| 641 |
+
'total': time.time() - total_start_time
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
return cached_result
|
| 645 |
+
|
| 646 |
+
timing_breakdown['cache_check'] = time.time() - phase_start
|
| 647 |
+
phase_start = time.time()
|
| 648 |
+
|
| 649 |
+
# 2. Check for similar frame if enabled
|
| 650 |
+
if self.skip_similar_frames and hasattr(self, 'last_frame_result') and frame_index > 0:
|
| 651 |
+
# Only check every 5 frames for similarity (to avoid overhead)
|
| 652 |
+
if frame_index % 5 == 0:
|
| 653 |
+
# Calculate frame difference using a fast method
|
| 654 |
+
if self.last_frame is not None:
|
| 655 |
+
# Resize for faster comparison
|
| 656 |
+
current_small = cv2.resize(frame, (64, 64))
|
| 657 |
+
last_small = cv2.resize(self.last_frame, (64, 64))
|
| 658 |
+
|
| 659 |
+
# Convert to grayscale
|
| 660 |
+
current_gray = cv2.cvtColor(current_small, cv2.COLOR_BGR2GRAY)
|
| 661 |
+
last_gray = cv2.cvtColor(last_small, cv2.COLOR_BGR2GRAY)
|
| 662 |
+
|
| 663 |
+
# Calculate absolute difference and mean
|
| 664 |
+
diff = cv2.absdiff(current_gray, last_gray)
|
| 665 |
+
mean_diff = np.mean(diff)
|
| 666 |
+
|
| 667 |
+
# If frames are very similar, reuse the previous result
|
| 668 |
+
if mean_diff < 3.0: # Threshold for similarity
|
| 669 |
+
result = self.last_frame_result.copy()
|
| 670 |
+
result['frame_index'] = frame_index
|
| 671 |
+
result['similar_to_previous'] = True
|
| 672 |
+
result['frame_difference'] = float(mean_diff)
|
| 673 |
+
|
| 674 |
+
# Update timing information
|
| 675 |
+
similarity_check_time = time.time() - phase_start
|
| 676 |
+
timing_breakdown['similarity_check'] = similarity_check_time
|
| 677 |
+
result['timing_breakdown'] = {
|
| 678 |
+
'cache_check': timing_breakdown['cache_check'],
|
| 679 |
+
'similarity_check': similarity_check_time,
|
| 680 |
+
'total': time.time() - total_start_time
|
| 681 |
+
}
|
| 682 |
+
result['processing_time'] = time.time() - total_start_time
|
| 683 |
+
|
| 684 |
+
return result
|
| 685 |
+
|
| 686 |
+
timing_breakdown['similarity_check'] = time.time() - phase_start
|
| 687 |
+
phase_start = time.time()
|
| 688 |
+
|
| 689 |
+
# 3. Process the frame as normal
|
| 690 |
+
if backend not in self.backends:
|
| 691 |
+
logger.warning(f"Backend {backend} not supported, using mediapipe")
|
| 692 |
+
backend = 'mediapipe'
|
| 693 |
+
|
| 694 |
+
# Call the appropriate backend function
|
| 695 |
+
result = self.backends[backend](frame, frame_index)
|
| 696 |
+
|
| 697 |
+
# Get face detection and emotion analysis timing from backend result
|
| 698 |
+
backend_timing = result.pop('timing_breakdown', {})
|
| 699 |
+
timing_breakdown['face_detection'] = backend_timing.get('face_detection', 0)
|
| 700 |
+
timing_breakdown['emotion_analysis'] = backend_timing.get('emotion_analysis', 0)
|
| 701 |
+
|
| 702 |
+
phase_start = time.time()
|
| 703 |
+
|
| 704 |
+
# Apply temporal consistency check
|
| 705 |
+
if 'faces' in result:
|
| 706 |
+
result['faces'] = self._check_temporal_consistency(result['faces'], frame.shape)
|
| 707 |
+
|
| 708 |
+
# If we have faces and center face priority is enabled, add main face info
|
| 709 |
+
if self.center_face_priority and result['faces']:
|
| 710 |
+
# The first face should be the center face after _check_temporal_consistency
|
| 711 |
+
main_face = result['faces'][0]
|
| 712 |
+
result['main_face'] = main_face
|
| 713 |
+
|
| 714 |
+
# Add confidence score for the main face
|
| 715 |
+
if 'emotion' in main_face:
|
| 716 |
+
# Use the stability-checked emotion if available
|
| 717 |
+
if 'dominant_emotion' in main_face and 'emotion_confidence' in main_face:
|
| 718 |
+
result['main_emotion'] = {
|
| 719 |
+
'emotion': main_face['dominant_emotion'],
|
| 720 |
+
'confidence': main_face['emotion_confidence'],
|
| 721 |
+
'stable': main_face.get('emotion_stable', False)
|
| 722 |
+
}
|
| 723 |
+
else:
|
| 724 |
+
# Fall back to simple max if stability check wasn't run
|
| 725 |
+
dominant_emotion = max(main_face['emotion'].items(), key=lambda x: x[1])
|
| 726 |
+
result['main_emotion'] = {
|
| 727 |
+
'emotion': dominant_emotion[0],
|
| 728 |
+
'confidence': dominant_emotion[1]
|
| 729 |
+
}
|
| 730 |
+
|
| 731 |
+
timing_breakdown['temporal_consistency'] = time.time() - phase_start
|
| 732 |
+
phase_start = time.time()
|
| 733 |
+
|
| 734 |
+
# Add device information
|
| 735 |
+
result['device_used'] = DEVICE
|
| 736 |
+
|
| 737 |
+
# Add detailed timing information
|
| 738 |
+
timing_breakdown['misc_processing'] = time.time() - phase_start
|
| 739 |
+
timing_breakdown['total'] = time.time() - total_start_time
|
| 740 |
+
result['timing_breakdown'] = timing_breakdown
|
| 741 |
+
|
| 742 |
+
# Update total processing time to include all steps
|
| 743 |
+
result['processing_time'] = timing_breakdown['total']
|
| 744 |
+
|
| 745 |
+
# Cache the result if caching is enabled
|
| 746 |
+
if self.enable_cache:
|
| 747 |
+
cache_key = f"{frame_hash}_{backend}"
|
| 748 |
+
self.frame_cache[cache_key] = result
|
| 749 |
+
|
| 750 |
+
# Store last frame and result for similarity check
|
| 751 |
+
self.last_frame = frame.copy()
|
| 752 |
+
self.last_frame_result = result
|
| 753 |
+
|
| 754 |
+
return result
|
| 755 |
+
|
| 756 |
+
def _analyze_opencv(self, frame: np.ndarray, frame_index: int) -> Dict[str, Any]:
|
| 757 |
+
"""
|
| 758 |
+
Analyze emotions using OpenCV backend.
|
| 759 |
+
|
| 760 |
+
Args:
|
| 761 |
+
frame: Video frame as numpy array
|
| 762 |
+
frame_index: Index of the frame
|
| 763 |
+
|
| 764 |
+
Returns:
|
| 765 |
+
Dictionary with analysis results
|
| 766 |
+
"""
|
| 767 |
+
start_time = time.time()
|
| 768 |
+
|
| 769 |
+
try:
|
| 770 |
+
# Convert to grayscale for face detection
|
| 771 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 772 |
+
|
| 773 |
+
# Load OpenCV face detector
|
| 774 |
+
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
| 775 |
+
|
| 776 |
+
# Detect faces
|
| 777 |
+
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
|
| 778 |
+
|
| 779 |
+
# If no faces detected, return empty result
|
| 780 |
+
if len(faces) == 0:
|
| 781 |
+
return {
|
| 782 |
+
'frame_index': frame_index,
|
| 783 |
+
'faces': [],
|
| 784 |
+
'gpu_used': False,
|
| 785 |
+
'framework': 'opencv',
|
| 786 |
+
'processing_time': time.time() - start_time
|
| 787 |
+
}
|
| 788 |
+
|
| 789 |
+
# Get image dimensions for center calculation
|
| 790 |
+
ih, iw, _ = frame.shape
|
| 791 |
+
img_center_x = iw / 2
|
| 792 |
+
img_center_y = ih / 2
|
| 793 |
+
|
| 794 |
+
# Process each face
|
| 795 |
+
face_results = []
|
| 796 |
+
for (x, y, w, h) in faces:
|
| 797 |
+
# Validate face
|
| 798 |
+
if not self._is_valid_face([x, y, w, h], frame.shape):
|
| 799 |
+
continue
|
| 800 |
+
|
| 801 |
+
# Calculate center of face and distance to image center
|
| 802 |
+
face_center_x = x + w / 2
|
| 803 |
+
face_center_y = y + h / 2
|
| 804 |
+
center_distance = np.sqrt((face_center_x - img_center_x)**2 + (face_center_y - img_center_y)**2)
|
| 805 |
+
center_distance_ratio = center_distance / np.sqrt(iw**2 + ih**2)
|
| 806 |
+
|
| 807 |
+
face_img = frame[y:y+h, x:x+w]
|
| 808 |
+
|
| 809 |
+
# Analyze emotions with DeepFace
|
| 810 |
+
try:
|
| 811 |
+
emotion_result = DeepFace.analyze(
|
| 812 |
+
face_img,
|
| 813 |
+
actions=['emotion'],
|
| 814 |
+
enforce_detection=False,
|
| 815 |
+
silent=True
|
| 816 |
+
)
|
| 817 |
+
|
| 818 |
+
# Extract emotion scores
|
| 819 |
+
if isinstance(emotion_result, list):
|
| 820 |
+
emotion_scores = emotion_result[0]['emotion']
|
| 821 |
+
else:
|
| 822 |
+
emotion_scores = emotion_result['emotion']
|
| 823 |
+
|
| 824 |
+
face_results.append({
|
| 825 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 826 |
+
'emotion': emotion_scores,
|
| 827 |
+
'center_distance': float(center_distance),
|
| 828 |
+
'center_distance_ratio': float(center_distance_ratio)
|
| 829 |
+
})
|
| 830 |
+
except Exception as e:
|
| 831 |
+
logger.warning(f"Error analyzing face: {e}")
|
| 832 |
+
|
| 833 |
+
return {
|
| 834 |
+
'frame_index': frame_index,
|
| 835 |
+
'faces': face_results,
|
| 836 |
+
'gpu_used': False,
|
| 837 |
+
'framework': 'opencv',
|
| 838 |
+
'processing_time': time.time() - start_time
|
| 839 |
+
}
|
| 840 |
+
|
| 841 |
+
except Exception as e:
|
| 842 |
+
logger.error(f"Error in OpenCV analysis: {e}")
|
| 843 |
+
return {
|
| 844 |
+
'frame_index': frame_index,
|
| 845 |
+
'faces': [],
|
| 846 |
+
'error': str(e),
|
| 847 |
+
'gpu_used': False,
|
| 848 |
+
'framework': 'opencv',
|
| 849 |
+
'processing_time': time.time() - start_time
|
| 850 |
+
}
|
| 851 |
+
|
| 852 |
+
def _analyze_mediapipe(self, frame: np.ndarray, frame_index: int) -> Dict[str, Any]:
|
| 853 |
+
"""
|
| 854 |
+
Optimized MediaPipe-based face and emotion analysis with batch processing.
|
| 855 |
+
"""
|
| 856 |
+
start_time = time.time()
|
| 857 |
+
|
| 858 |
+
# Initialize timing breakdown
|
| 859 |
+
timing_breakdown = {
|
| 860 |
+
'face_detection': 0,
|
| 861 |
+
'emotion_analysis': 0,
|
| 862 |
+
'preprocessing': 0,
|
| 863 |
+
'postprocessing': 0
|
| 864 |
+
}
|
| 865 |
+
|
| 866 |
+
try:
|
| 867 |
+
# Track preprocessing time
|
| 868 |
+
preprocess_start = time.time()
|
| 869 |
+
|
| 870 |
+
# Basic preprocessing for primary detection
|
| 871 |
+
rgb_frame = self._preprocess_frame(frame)
|
| 872 |
+
rgb_frame.flags.writeable = False
|
| 873 |
+
|
| 874 |
+
timing_breakdown['preprocessing'] = time.time() - preprocess_start
|
| 875 |
+
|
| 876 |
+
# Track face detection time
|
| 877 |
+
detection_start = time.time()
|
| 878 |
+
|
| 879 |
+
# Run face detection
|
| 880 |
+
detection_results = self.face_detection.process(rgb_frame)
|
| 881 |
+
rgb_frame.flags.writeable = True
|
| 882 |
+
|
| 883 |
+
# If no faces detected, try backup method with enhanced preprocessing
|
| 884 |
+
if not detection_results.detections:
|
| 885 |
+
enhanced_frame = self._enhanced_preprocess_frame(frame)
|
| 886 |
+
gray = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2GRAY)
|
| 887 |
+
faces = self.face_cascade.detectMultiScale(
|
| 888 |
+
gray,
|
| 889 |
+
scaleFactor=1.1,
|
| 890 |
+
minNeighbors=4,
|
| 891 |
+
minSize=(30, 30),
|
| 892 |
+
flags=cv2.CASCADE_SCALE_IMAGE
|
| 893 |
+
)
|
| 894 |
+
|
| 895 |
+
if len(faces) > 0:
|
| 896 |
+
detection_results.detections = []
|
| 897 |
+
for (x, y, w, h) in faces:
|
| 898 |
+
relative_bbox = mp.solutions.face_detection.Detection()
|
| 899 |
+
relative_bbox.location_data.relative_bounding_box.xmin = x / frame.shape[1]
|
| 900 |
+
relative_bbox.location_data.relative_bounding_box.ymin = y / frame.shape[0]
|
| 901 |
+
relative_bbox.location_data.relative_bounding_box.width = w / frame.shape[1]
|
| 902 |
+
relative_bbox.location_data.relative_bounding_box.height = h / frame.shape[0]
|
| 903 |
+
relative_bbox.score = [0.5]
|
| 904 |
+
detection_results.detections.append(relative_bbox)
|
| 905 |
+
|
| 906 |
+
timing_breakdown['face_detection'] = time.time() - detection_start
|
| 907 |
+
|
| 908 |
+
# Process detections
|
| 909 |
+
face_results = []
|
| 910 |
+
face_rois = []
|
| 911 |
+
face_positions = []
|
| 912 |
+
|
| 913 |
+
# Track emotion analysis time
|
| 914 |
+
emotion_start = time.time()
|
| 915 |
+
|
| 916 |
+
if detection_results.detections:
|
| 917 |
+
ih, iw = frame.shape[:2]
|
| 918 |
+
|
| 919 |
+
for detection in detection_results.detections:
|
| 920 |
+
bbox = detection.location_data.relative_bounding_box
|
| 921 |
+
x = max(0, int(bbox.xmin * iw))
|
| 922 |
+
y = max(0, int(bbox.ymin * ih))
|
| 923 |
+
w = min(int(bbox.width * iw), iw - x)
|
| 924 |
+
h = min(int(bbox.height * ih), ih - y)
|
| 925 |
+
|
| 926 |
+
if w <= 0 or h <= 0:
|
| 927 |
+
continue
|
| 928 |
+
|
| 929 |
+
# Calculate face center and distance
|
| 930 |
+
face_center_x = x + w/2
|
| 931 |
+
face_center_y = y + h/2
|
| 932 |
+
img_center_x = iw/2
|
| 933 |
+
img_center_y = ih/2
|
| 934 |
+
center_distance = np.sqrt((face_center_x - img_center_x)**2 +
|
| 935 |
+
(face_center_y - img_center_y)**2)
|
| 936 |
+
|
| 937 |
+
# Extract face ROI
|
| 938 |
+
face_roi = frame[y:y+h, x:x+w]
|
| 939 |
+
|
| 940 |
+
# Check if face is valid
|
| 941 |
+
if face_roi.size == 0:
|
| 942 |
+
continue
|
| 943 |
+
|
| 944 |
+
# Generate a hash for this face for caching
|
| 945 |
+
if self.enable_cache and self.face_cache is not None:
|
| 946 |
+
small_face = cv2.resize(face_roi, (32, 32))
|
| 947 |
+
face_hash = hashlib.md5(small_face.tobytes()).hexdigest()
|
| 948 |
+
|
| 949 |
+
# Check if we've already analyzed this face
|
| 950 |
+
cached_emotion = self.emotion_cache.get(face_hash)
|
| 951 |
+
if cached_emotion is not None:
|
| 952 |
+
face_results.append({
|
| 953 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 954 |
+
'emotion': cached_emotion,
|
| 955 |
+
'detection_confidence': float(detection.score[0]),
|
| 956 |
+
'center_distance': float(center_distance),
|
| 957 |
+
'center_distance_ratio': float(center_distance / np.sqrt(iw**2 + ih**2)),
|
| 958 |
+
'from_cache': True
|
| 959 |
+
})
|
| 960 |
+
continue
|
| 961 |
+
|
| 962 |
+
# Store face ROI for batch processing
|
| 963 |
+
face_rois.append(face_roi)
|
| 964 |
+
face_positions.append((x, y, w, h, detection.score[0], center_distance, face_hash if self.enable_cache else None))
|
| 965 |
+
|
| 966 |
+
# Process faces in batches if multiple faces detected
|
| 967 |
+
if face_rois:
|
| 968 |
+
# Determine if we should use batched or individual processing
|
| 969 |
+
use_batching = self.cuda_available and len(face_rois) > 1 and len(face_rois) <= self.batch_size
|
| 970 |
+
|
| 971 |
+
if use_batching:
|
| 972 |
+
# Batch process faces
|
| 973 |
+
batch_results = self._batch_process_emotions(face_rois)
|
| 974 |
+
|
| 975 |
+
# Create face results from batch results
|
| 976 |
+
for i, (emotion_scores, (x, y, w, h, confidence, distance, face_hash)) in enumerate(zip(batch_results, face_positions)):
|
| 977 |
+
# Cache this result if caching is enabled
|
| 978 |
+
if self.enable_cache and face_hash is not None:
|
| 979 |
+
self.emotion_cache[face_hash] = emotion_scores
|
| 980 |
+
|
| 981 |
+
face_results.append({
|
| 982 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 983 |
+
'emotion': emotion_scores,
|
| 984 |
+
'detection_confidence': float(confidence),
|
| 985 |
+
'center_distance': float(distance),
|
| 986 |
+
'center_distance_ratio': float(distance / np.sqrt(iw**2 + ih**2)),
|
| 987 |
+
'batched': True
|
| 988 |
+
})
|
| 989 |
+
else:
|
| 990 |
+
# Process each face individually
|
| 991 |
+
for i, face_roi in enumerate(face_rois):
|
| 992 |
+
x, y, w, h, confidence, distance, face_hash = face_positions[i]
|
| 993 |
+
|
| 994 |
+
try:
|
| 995 |
+
# Analyze emotions with optimized settings
|
| 996 |
+
emotion_result = DeepFace.analyze(
|
| 997 |
+
face_roi,
|
| 998 |
+
actions=['emotion'],
|
| 999 |
+
enforce_detection=False,
|
| 1000 |
+
silent=True,
|
| 1001 |
+
detector_backend='skip' # Skip detection since we already have the face
|
| 1002 |
+
)
|
| 1003 |
+
|
| 1004 |
+
emotion_scores = emotion_result[0]['emotion'] if isinstance(emotion_result, list) else emotion_result['emotion']
|
| 1005 |
+
|
| 1006 |
+
# Cache this result if caching is enabled
|
| 1007 |
+
if self.enable_cache and face_hash is not None:
|
| 1008 |
+
self.emotion_cache[face_hash] = emotion_scores
|
| 1009 |
+
|
| 1010 |
+
face_results.append({
|
| 1011 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 1012 |
+
'emotion': emotion_scores,
|
| 1013 |
+
'detection_confidence': float(confidence),
|
| 1014 |
+
'center_distance': float(distance),
|
| 1015 |
+
'center_distance_ratio': float(distance / np.sqrt(iw**2 + ih**2))
|
| 1016 |
+
})
|
| 1017 |
+
except Exception as e:
|
| 1018 |
+
logger.warning(f"Error analyzing face emotions: {e}")
|
| 1019 |
+
|
| 1020 |
+
timing_breakdown['emotion_analysis'] = time.time() - emotion_start
|
| 1021 |
+
|
| 1022 |
+
# Track postprocessing time
|
| 1023 |
+
postprocess_start = time.time()
|
| 1024 |
+
|
| 1025 |
+
total_time = time.time() - start_time
|
| 1026 |
+
timing_breakdown['postprocessing'] = time.time() - postprocess_start
|
| 1027 |
+
timing_breakdown['total'] = total_time
|
| 1028 |
+
|
| 1029 |
+
return {
|
| 1030 |
+
'frame_index': frame_index,
|
| 1031 |
+
'faces': face_results,
|
| 1032 |
+
'gpu_used': self.cuda_available,
|
| 1033 |
+
'framework': 'mediapipe',
|
| 1034 |
+
'processing_time': total_time,
|
| 1035 |
+
'timing_breakdown': timing_breakdown
|
| 1036 |
+
}
|
| 1037 |
+
|
| 1038 |
+
except Exception as e:
|
| 1039 |
+
logger.error(f"Error in MediaPipe analysis: {e}")
|
| 1040 |
+
return {
|
| 1041 |
+
'frame_index': frame_index,
|
| 1042 |
+
'faces': [],
|
| 1043 |
+
'error': str(e),
|
| 1044 |
+
'gpu_used': False,
|
| 1045 |
+
'framework': 'mediapipe',
|
| 1046 |
+
'processing_time': time.time() - start_time
|
| 1047 |
+
}
|
| 1048 |
+
|
| 1049 |
+
def _analyze_mtcnn(self, frame: np.ndarray, frame_index: int) -> Dict[str, Any]:
|
| 1050 |
+
"""
|
| 1051 |
+
Analyze emotions using MTCNN backend.
|
| 1052 |
+
|
| 1053 |
+
Args:
|
| 1054 |
+
frame: Video frame as numpy array
|
| 1055 |
+
frame_index: Index of the frame
|
| 1056 |
+
|
| 1057 |
+
Returns:
|
| 1058 |
+
Dictionary with analysis results
|
| 1059 |
+
"""
|
| 1060 |
+
start_time = time.time()
|
| 1061 |
+
|
| 1062 |
+
try:
|
| 1063 |
+
# Analyze with DeepFace using MTCNN backend
|
| 1064 |
+
results = DeepFace.analyze(
|
| 1065 |
+
frame,
|
| 1066 |
+
actions=['emotion'],
|
| 1067 |
+
detector_backend='mtcnn',
|
| 1068 |
+
enforce_detection=False,
|
| 1069 |
+
silent=True
|
| 1070 |
+
)
|
| 1071 |
+
|
| 1072 |
+
# Process results
|
| 1073 |
+
face_results = []
|
| 1074 |
+
|
| 1075 |
+
if isinstance(results, list):
|
| 1076 |
+
for result in results:
|
| 1077 |
+
region = result.get('region', {})
|
| 1078 |
+
x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('w', 0), region.get('h', 0)
|
| 1079 |
+
confidence = result.get('confidence', 0)
|
| 1080 |
+
|
| 1081 |
+
# Validate face with confidence
|
| 1082 |
+
if not self._is_valid_face([x, y, w, h], frame.shape, confidence):
|
| 1083 |
+
continue
|
| 1084 |
+
|
| 1085 |
+
face_results.append({
|
| 1086 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 1087 |
+
'emotion': result.get('emotion', {})
|
| 1088 |
+
})
|
| 1089 |
+
else:
|
| 1090 |
+
region = results.get('region', {})
|
| 1091 |
+
x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('w', 0), region.get('h', 0)
|
| 1092 |
+
confidence = results.get('confidence', 0)
|
| 1093 |
+
|
| 1094 |
+
# Validate face with confidence
|
| 1095 |
+
if self._is_valid_face([x, y, w, h], frame.shape, confidence):
|
| 1096 |
+
face_results.append({
|
| 1097 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 1098 |
+
'emotion': results.get('emotion', {})
|
| 1099 |
+
})
|
| 1100 |
+
|
| 1101 |
+
return {
|
| 1102 |
+
'frame_index': frame_index,
|
| 1103 |
+
'faces': face_results,
|
| 1104 |
+
'gpu_used': True, # MTCNN can use GPU
|
| 1105 |
+
'framework': 'mtcnn',
|
| 1106 |
+
'processing_time': time.time() - start_time
|
| 1107 |
+
}
|
| 1108 |
+
|
| 1109 |
+
except Exception as e:
|
| 1110 |
+
logger.error(f"Error in MTCNN analysis: {e}")
|
| 1111 |
+
return {
|
| 1112 |
+
'frame_index': frame_index,
|
| 1113 |
+
'faces': [],
|
| 1114 |
+
'error': str(e),
|
| 1115 |
+
'gpu_used': True,
|
| 1116 |
+
'framework': 'mtcnn',
|
| 1117 |
+
'processing_time': time.time() - start_time
|
| 1118 |
+
}
|
| 1119 |
+
|
| 1120 |
+
def _analyze_ssd(self, frame: np.ndarray, frame_index: int) -> Dict[str, Any]:
|
| 1121 |
+
"""
|
| 1122 |
+
Analyze emotions using SSD backend.
|
| 1123 |
+
|
| 1124 |
+
Args:
|
| 1125 |
+
frame: Video frame as numpy array
|
| 1126 |
+
frame_index: Index of the frame
|
| 1127 |
+
|
| 1128 |
+
Returns:
|
| 1129 |
+
Dictionary with analysis results
|
| 1130 |
+
"""
|
| 1131 |
+
start_time = time.time()
|
| 1132 |
+
|
| 1133 |
+
try:
|
| 1134 |
+
# Get image dimensions for center calculation
|
| 1135 |
+
ih, iw, _ = frame.shape
|
| 1136 |
+
img_center_x = iw / 2
|
| 1137 |
+
img_center_y = ih / 2
|
| 1138 |
+
|
| 1139 |
+
# Analyze with DeepFace using SSD backend
|
| 1140 |
+
results = DeepFace.analyze(
|
| 1141 |
+
frame,
|
| 1142 |
+
actions=['emotion'],
|
| 1143 |
+
detector_backend='ssd',
|
| 1144 |
+
enforce_detection=False,
|
| 1145 |
+
silent=True
|
| 1146 |
+
)
|
| 1147 |
+
|
| 1148 |
+
# Log results for debugging
|
| 1149 |
+
logger.info(f"SSD Raw results type: {type(results)}")
|
| 1150 |
+
if isinstance(results, list):
|
| 1151 |
+
logger.info(f"SSD Raw results length: {len(results)}")
|
| 1152 |
+
if results:
|
| 1153 |
+
logger.info(f"SSD First result keys: {results[0].keys()}")
|
| 1154 |
+
|
| 1155 |
+
# Process results
|
| 1156 |
+
face_results = []
|
| 1157 |
+
|
| 1158 |
+
if isinstance(results, list):
|
| 1159 |
+
logger.info(f"Processing list of results with length: {len(results)}")
|
| 1160 |
+
for result in results:
|
| 1161 |
+
region = result.get('region', {})
|
| 1162 |
+
x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('w', 0), region.get('h', 0)
|
| 1163 |
+
|
| 1164 |
+
# Get confidence from face_confidence if available, otherwise use 0.7 as default
|
| 1165 |
+
confidence = result.get('face_confidence', result.get('confidence', 0.7))
|
| 1166 |
+
|
| 1167 |
+
logger.info(f"Face detected at [{x}, {y}, {w}, {h}] with confidence {confidence}")
|
| 1168 |
+
|
| 1169 |
+
# Validate face with confidence
|
| 1170 |
+
if not self._is_valid_face([x, y, w, h], frame.shape, confidence):
|
| 1171 |
+
logger.info(f"Face validation failed for face at [{x}, {y}, {w}, {h}]")
|
| 1172 |
+
continue
|
| 1173 |
+
|
| 1174 |
+
# Calculate center of face and distance to image center
|
| 1175 |
+
face_center_x = x + w / 2
|
| 1176 |
+
face_center_y = y + h / 2
|
| 1177 |
+
center_distance = np.sqrt((face_center_x - img_center_x)**2 + (face_center_y - img_center_y)**2)
|
| 1178 |
+
center_distance_ratio = center_distance / np.sqrt(iw**2 + ih**2)
|
| 1179 |
+
|
| 1180 |
+
face_results.append({
|
| 1181 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 1182 |
+
'emotion': result.get('emotion', {}),
|
| 1183 |
+
'detection_confidence': float(confidence),
|
| 1184 |
+
'center_distance': float(center_distance),
|
| 1185 |
+
'center_distance_ratio': float(center_distance_ratio)
|
| 1186 |
+
})
|
| 1187 |
+
else:
|
| 1188 |
+
region = results.get('region', {})
|
| 1189 |
+
x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('w', 0), region.get('h', 0)
|
| 1190 |
+
|
| 1191 |
+
# Get confidence from face_confidence if available, otherwise use 0.7 as default
|
| 1192 |
+
confidence = results.get('face_confidence', results.get('confidence', 0.7))
|
| 1193 |
+
|
| 1194 |
+
logger.info(f"Face detected at [{x}, {y}, {w}, {h}] with confidence {confidence}")
|
| 1195 |
+
|
| 1196 |
+
# Validate face with confidence
|
| 1197 |
+
if self._is_valid_face([x, y, w, h], frame.shape, confidence):
|
| 1198 |
+
# Calculate center of face and distance to image center
|
| 1199 |
+
face_center_x = x + w / 2
|
| 1200 |
+
face_center_y = y + h / 2
|
| 1201 |
+
center_distance = np.sqrt((face_center_x - img_center_x)**2 + (face_center_y - img_center_y)**2)
|
| 1202 |
+
center_distance_ratio = center_distance / np.sqrt(iw**2 + ih**2)
|
| 1203 |
+
|
| 1204 |
+
face_results.append({
|
| 1205 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 1206 |
+
'emotion': results.get('emotion', {}),
|
| 1207 |
+
'detection_confidence': float(confidence),
|
| 1208 |
+
'center_distance': float(center_distance),
|
| 1209 |
+
'center_distance_ratio': float(center_distance_ratio)
|
| 1210 |
+
})
|
| 1211 |
+
else:
|
| 1212 |
+
logger.info(f"Face validation failed for face at [{x}, {y}, {w}, {h}]")
|
| 1213 |
+
|
| 1214 |
+
logger.info(f"Final face_results length: {len(face_results)}")
|
| 1215 |
+
|
| 1216 |
+
return {
|
| 1217 |
+
'frame_index': frame_index,
|
| 1218 |
+
'faces': face_results,
|
| 1219 |
+
'gpu_used': False, # Set to False as GPU usage is determined by DeepFace
|
| 1220 |
+
'framework': 'ssd',
|
| 1221 |
+
'processing_time': time.time() - start_time
|
| 1222 |
+
}
|
| 1223 |
+
|
| 1224 |
+
except Exception as e:
|
| 1225 |
+
logger.error(f"Error in SSD analysis: {e}")
|
| 1226 |
+
return {
|
| 1227 |
+
'frame_index': frame_index,
|
| 1228 |
+
'faces': [],
|
| 1229 |
+
'error': str(e),
|
| 1230 |
+
'gpu_used': False,
|
| 1231 |
+
'framework': 'ssd',
|
| 1232 |
+
'processing_time': time.time() - start_time
|
| 1233 |
+
}
|
| 1234 |
+
|
| 1235 |
+
def _analyze_retinaface(self, frame: np.ndarray, frame_index: int) -> Dict[str, Any]:
|
| 1236 |
+
"""
|
| 1237 |
+
Analyze emotions using RetinaFace backend.
|
| 1238 |
+
|
| 1239 |
+
Args:
|
| 1240 |
+
frame: Video frame as numpy array
|
| 1241 |
+
frame_index: Index of the frame
|
| 1242 |
+
|
| 1243 |
+
Returns:
|
| 1244 |
+
Dictionary with analysis results
|
| 1245 |
+
"""
|
| 1246 |
+
start_time = time.time()
|
| 1247 |
+
|
| 1248 |
+
try:
|
| 1249 |
+
# Analyze with DeepFace using RetinaFace backend
|
| 1250 |
+
results = DeepFace.analyze(
|
| 1251 |
+
frame,
|
| 1252 |
+
actions=['emotion'],
|
| 1253 |
+
detector_backend='retinaface',
|
| 1254 |
+
enforce_detection=False,
|
| 1255 |
+
silent=True
|
| 1256 |
+
)
|
| 1257 |
+
|
| 1258 |
+
# Process results
|
| 1259 |
+
face_results = []
|
| 1260 |
+
|
| 1261 |
+
if isinstance(results, list):
|
| 1262 |
+
for result in results:
|
| 1263 |
+
region = result.get('region', {})
|
| 1264 |
+
x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('w', 0), region.get('h', 0)
|
| 1265 |
+
confidence = result.get('confidence', 0)
|
| 1266 |
+
|
| 1267 |
+
# Validate face with confidence
|
| 1268 |
+
if not self._is_valid_face([x, y, w, h], frame.shape, confidence):
|
| 1269 |
+
continue
|
| 1270 |
+
|
| 1271 |
+
face_results.append({
|
| 1272 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 1273 |
+
'emotion': result.get('emotion', {})
|
| 1274 |
+
})
|
| 1275 |
+
else:
|
| 1276 |
+
region = results.get('region', {})
|
| 1277 |
+
x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('w', 0), region.get('h', 0)
|
| 1278 |
+
confidence = results.get('confidence', 0)
|
| 1279 |
+
|
| 1280 |
+
# Validate face with confidence
|
| 1281 |
+
if self._is_valid_face([x, y, w, h], frame.shape, confidence):
|
| 1282 |
+
face_results.append({
|
| 1283 |
+
'face_box': [int(x), int(y), int(w), int(h)],
|
| 1284 |
+
'emotion': results.get('emotion', {})
|
| 1285 |
+
})
|
| 1286 |
+
|
| 1287 |
+
return {
|
| 1288 |
+
'frame_index': frame_index,
|
| 1289 |
+
'faces': face_results,
|
| 1290 |
+
'gpu_used': False, # RetinaFace doesn't use GPU efficiently
|
| 1291 |
+
'framework': 'retinaface',
|
| 1292 |
+
'processing_time': time.time() - start_time
|
| 1293 |
+
}
|
| 1294 |
+
|
| 1295 |
+
except Exception as e:
|
| 1296 |
+
logger.error(f"Error in RetinaFace analysis: {e}")
|
| 1297 |
+
return {
|
| 1298 |
+
'frame_index': frame_index,
|
| 1299 |
+
'faces': [],
|
| 1300 |
+
'error': str(e),
|
| 1301 |
+
'gpu_used': False,
|
| 1302 |
+
'framework': 'retinaface',
|
| 1303 |
+
'processing_time': time.time() - start_time
|
| 1304 |
+
}
|
| 1305 |
+
|
| 1306 |
+
@time_it
|
| 1307 |
+
def annotate_frame(self, frame: np.ndarray, results: Dict[str, Any]) -> np.ndarray:
|
| 1308 |
+
"""
|
| 1309 |
+
Annotate a frame with emotion analysis results.
|
| 1310 |
+
|
| 1311 |
+
Args:
|
| 1312 |
+
frame: Video frame as numpy array
|
| 1313 |
+
results: Emotion analysis results
|
| 1314 |
+
|
| 1315 |
+
Returns:
|
| 1316 |
+
Annotated frame
|
| 1317 |
+
"""
|
| 1318 |
+
annotated_frame = frame.copy()
|
| 1319 |
+
|
| 1320 |
+
# Draw faces and emotions
|
| 1321 |
+
for face in results.get('faces', []):
|
| 1322 |
+
face_box = face.get('face_box')
|
| 1323 |
+
if not face_box:
|
| 1324 |
+
continue
|
| 1325 |
+
|
| 1326 |
+
x, y, w, h = face_box
|
| 1327 |
+
|
| 1328 |
+
# Draw rectangle around face
|
| 1329 |
+
cv2.rectangle(annotated_frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
|
| 1330 |
+
|
| 1331 |
+
# Get dominant emotion
|
| 1332 |
+
emotions = face.get('emotion', {})
|
| 1333 |
+
if not emotions:
|
| 1334 |
+
continue
|
| 1335 |
+
|
| 1336 |
+
dominant_emotion = max(emotions.items(), key=lambda x: x[1])[0]
|
| 1337 |
+
dominant_score = emotions[dominant_emotion]
|
| 1338 |
+
|
| 1339 |
+
# Draw emotion label
|
| 1340 |
+
label = f"{dominant_emotion}: {dominant_score:.2f}"
|
| 1341 |
+
cv2.putText(annotated_frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
|
| 1342 |
+
|
| 1343 |
+
return annotated_frame
|
| 1344 |
+
|
| 1345 |
+
@time_it
|
| 1346 |
+
def process_video_frames(
|
| 1347 |
+
self,
|
| 1348 |
+
video_path: str,
|
| 1349 |
+
frame_rate: int = 1,
|
| 1350 |
+
backend: str = 'mediapipe',
|
| 1351 |
+
generate_annotated_video: bool = False,
|
| 1352 |
+
status_callback = None,
|
| 1353 |
+
adaptive_sampling: bool = True,
|
| 1354 |
+
max_frames: int = 3000
|
| 1355 |
+
) -> Tuple[List[Dict[str, Any]], Optional[str], Dict[str, Any], Dict[str, Any]]:
|
| 1356 |
+
"""
|
| 1357 |
+
Process video frames for emotion analysis with adaptive sampling.
|
| 1358 |
+
|
| 1359 |
+
Args:
|
| 1360 |
+
video_path: Path to the video file
|
| 1361 |
+
frame_rate: Frame rate for processing (process every N frames)
|
| 1362 |
+
backend: Backend to use for face detection
|
| 1363 |
+
generate_annotated_video: Whether to generate an annotated video
|
| 1364 |
+
status_callback: Optional callback function to report progress
|
| 1365 |
+
adaptive_sampling: Whether to use adaptive frame sampling based on content
|
| 1366 |
+
max_frames: Maximum number of frames to process to prevent memory issues
|
| 1367 |
+
|
| 1368 |
+
Returns:
|
| 1369 |
+
A tuple containing:
|
| 1370 |
+
- results: List of dictionaries containing analysis results for each processed frame
|
| 1371 |
+
- annotated_video_path: Path to the annotated video if generated, None otherwise
|
| 1372 |
+
- timing_summary: Dictionary with summarized execution time statistics
|
| 1373 |
+
- metadata: Dictionary with detailed processing metadata and statistics
|
| 1374 |
+
|
| 1375 |
+
The timing_summary dictionary contains:
|
| 1376 |
+
- total_time: Total execution time in seconds
|
| 1377 |
+
- frame_processing_time: Time spent processing frames in seconds
|
| 1378 |
+
- avg_time_per_frame: Average time per frame in seconds
|
| 1379 |
+
- frames_processed: Number of frames processed
|
| 1380 |
+
- frames_from_cache: Number of frames retrieved from cache
|
| 1381 |
+
- frames_similar: Number of frames identified as similar to previous frames
|
| 1382 |
+
- avg_face_detection_time: Average time spent on face detection per frame
|
| 1383 |
+
- avg_emotion_analysis_time: Average time spent on emotion analysis per frame
|
| 1384 |
+
- cache_hit_rate: Cache hit rate as a percentage
|
| 1385 |
+
|
| 1386 |
+
The metadata dictionary contains detailed statistics about the processing:
|
| 1387 |
+
- timing_stats: Detailed timing statistics for each phase
|
| 1388 |
+
- detailed_timing: Average timing for each processing component
|
| 1389 |
+
- cache_stats: Cache hit/miss statistics
|
| 1390 |
+
- gpu_usage: GPU usage percentage
|
| 1391 |
+
- backend: Backend used for face detection
|
| 1392 |
+
- device: Device used for processing (CPU, CUDA, MPS)
|
| 1393 |
+
- frames_processed: Number of frames processed
|
| 1394 |
+
- total_frames: Total number of frames in the video
|
| 1395 |
+
- frame_rate: Processing frame rate (may differ from video frame rate)
|
| 1396 |
+
- adaptive_sampling: Whether adaptive sampling was used
|
| 1397 |
+
"""
|
| 1398 |
+
process_start_time = time.time()
|
| 1399 |
+
|
| 1400 |
+
# Initialize timing statistics
|
| 1401 |
+
timing_stats = {
|
| 1402 |
+
'video_loading': 0,
|
| 1403 |
+
'frame_processing': 0,
|
| 1404 |
+
'face_detection': 0,
|
| 1405 |
+
'emotion_analysis': 0,
|
| 1406 |
+
'temporal_consistency': 0,
|
| 1407 |
+
'annotation': 0,
|
| 1408 |
+
'video_saving': 0,
|
| 1409 |
+
'total': 0
|
| 1410 |
+
}
|
| 1411 |
+
|
| 1412 |
+
phase_start = time.time()
|
| 1413 |
+
|
| 1414 |
+
logger.info(f"Processing video: {video_path}")
|
| 1415 |
+
logger.info(f"Using backend: {backend}")
|
| 1416 |
+
logger.info(f"Using device: {DEVICE}")
|
| 1417 |
+
|
| 1418 |
+
# Open video
|
| 1419 |
+
cap = cv2.VideoCapture(video_path)
|
| 1420 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 1421 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 1422 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 1423 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 1424 |
+
|
| 1425 |
+
logger.info(f"Total frames in video: {total_frames}")
|
| 1426 |
+
|
| 1427 |
+
timing_stats['video_loading'] = time.time() - phase_start
|
| 1428 |
+
phase_start = time.time()
|
| 1429 |
+
|
| 1430 |
+
# Calculate memory requirements and adjust max_frames if needed
|
| 1431 |
+
frame_size_bytes = width * height * 3 # RGB image
|
| 1432 |
+
estimated_memory_per_frame = frame_size_bytes * 0.8 # Drastically reduced from 1.5 to 0.8
|
| 1433 |
+
|
| 1434 |
+
# Get available memory
|
| 1435 |
+
try:
|
| 1436 |
+
import psutil
|
| 1437 |
+
available_memory = psutil.virtual_memory().available
|
| 1438 |
+
|
| 1439 |
+
# Debug print memory info
|
| 1440 |
+
logger.info(f"Available memory: {available_memory / (1024*1024):.2f} MB")
|
| 1441 |
+
logger.info(f"Estimated memory per frame: {estimated_memory_per_frame / (1024*1024):.2f} MB")
|
| 1442 |
+
|
| 1443 |
+
# Calculate how many frames we can safely process - increase memory percentage to 0.9
|
| 1444 |
+
safe_max_frames = int(available_memory * 0.9 / estimated_memory_per_frame) # Increased to 0.9
|
| 1445 |
+
|
| 1446 |
+
# Force a minimum of 750 frames to match test behavior - even if memory check would result in fewer
|
| 1447 |
+
if safe_max_frames < 750:
|
| 1448 |
+
logger.warning(f"Memory constraints would limit to {safe_max_frames} frames, forcing minimum of 750 frames")
|
| 1449 |
+
safe_max_frames = 750
|
| 1450 |
+
|
| 1451 |
+
# Adjust max_frames if needed
|
| 1452 |
+
if safe_max_frames < max_frames:
|
| 1453 |
+
logger.warning(f"Adjusting max_frames from {max_frames} to {safe_max_frames} due to memory constraints")
|
| 1454 |
+
max_frames = safe_max_frames
|
| 1455 |
+
except Exception as e:
|
| 1456 |
+
logger.warning(f"Could not check system memory, using default max_frames: {str(e)}")
|
| 1457 |
+
# Force 750 frames minimum even if memory check fails
|
| 1458 |
+
max_frames = max(max_frames, 750)
|
| 1459 |
+
|
| 1460 |
+
# FORCE minimum 750 frames regardless of memory constraints to match test behavior
|
| 1461 |
+
max_frames = max(max_frames, 750)
|
| 1462 |
+
logger.info(f"Will process up to {max_frames} frames")
|
| 1463 |
+
|
| 1464 |
+
# Calculate adaptive frame rate if enabled
|
| 1465 |
+
if adaptive_sampling:
|
| 1466 |
+
# For short videos, process more frames
|
| 1467 |
+
if total_frames <= 600: # 10 minutes at 60fps
|
| 1468 |
+
adaptive_rate = 1
|
| 1469 |
+
# For medium videos, process every other frame
|
| 1470 |
+
elif total_frames <= 3600: # 1 hour at 60fps
|
| 1471 |
+
adaptive_rate = 2
|
| 1472 |
+
# For longer videos, sample more aggressively
|
| 1473 |
+
else:
|
| 1474 |
+
# Scale based on video length, but cap at reasonable values
|
| 1475 |
+
adaptive_rate = min(10, max(3, int(total_frames / 1800)))
|
| 1476 |
+
|
| 1477 |
+
# Override provided frame_rate with adaptive one
|
| 1478 |
+
logger.info(f"Using adaptive frame rate: {adaptive_rate} (1 frame every {adaptive_rate} frames)")
|
| 1479 |
+
frame_rate = adaptive_rate
|
| 1480 |
+
|
| 1481 |
+
# Prepare for annotated video if requested
|
| 1482 |
+
annotated_video_path = None
|
| 1483 |
+
video_writer = None
|
| 1484 |
+
|
| 1485 |
+
if generate_annotated_video:
|
| 1486 |
+
# Create a directory for annotated videos if it doesn't exist
|
| 1487 |
+
annotated_dir = Path("annotated_videos")
|
| 1488 |
+
annotated_dir.mkdir(exist_ok=True)
|
| 1489 |
+
|
| 1490 |
+
# Generate a filename for the annotated video
|
| 1491 |
+
video_filename = Path(video_path).stem
|
| 1492 |
+
annotated_video_path = str(annotated_dir / f"{video_filename}_annotated.mp4")
|
| 1493 |
+
|
| 1494 |
+
# Create VideoWriter
|
| 1495 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 1496 |
+
video_writer = cv2.VideoWriter(annotated_video_path, fourcc, fps, (width, height))
|
| 1497 |
+
|
| 1498 |
+
# Process frames
|
| 1499 |
+
results = []
|
| 1500 |
+
processed_count = 0
|
| 1501 |
+
gpu_usage_stats = {"frames_processed": 0, "gpu_used_frames": 0, "framework_used": None}
|
| 1502 |
+
total_processing_time = 0
|
| 1503 |
+
frame_processing_times = []
|
| 1504 |
+
|
| 1505 |
+
# Detailed timing statistics for analysis phases
|
| 1506 |
+
detailed_timing = {
|
| 1507 |
+
'face_detection': [],
|
| 1508 |
+
'emotion_analysis': [],
|
| 1509 |
+
'temporal_consistency': [],
|
| 1510 |
+
'cache_check': [],
|
| 1511 |
+
'similarity_check': [],
|
| 1512 |
+
'total_per_frame': []
|
| 1513 |
+
}
|
| 1514 |
+
|
| 1515 |
+
# Track frames from cache vs computed
|
| 1516 |
+
cache_stats = {
|
| 1517 |
+
'frames_from_cache': 0,
|
| 1518 |
+
'frames_computed': 0,
|
| 1519 |
+
'frames_similar': 0
|
| 1520 |
+
}
|
| 1521 |
+
|
| 1522 |
+
# Reset face tracking for a new video
|
| 1523 |
+
self.face_history = []
|
| 1524 |
+
self.frame_count = 0
|
| 1525 |
+
|
| 1526 |
+
# If caching is enabled, clear caches before processing
|
| 1527 |
+
if self.enable_cache:
|
| 1528 |
+
self.frame_cache = LRUCache(maxsize=self.frame_cache.maxsize)
|
| 1529 |
+
self.emotion_cache = LRUCache(maxsize=self.emotion_cache.maxsize)
|
| 1530 |
+
self.face_cache = LRUCache(maxsize=self.face_cache.maxsize)
|
| 1531 |
+
|
| 1532 |
+
# Track similar frames for adaptive processing
|
| 1533 |
+
last_processed_idx = -1
|
| 1534 |
+
consecutive_similar_frames = 0
|
| 1535 |
+
|
| 1536 |
+
frame_processing_start = time.time()
|
| 1537 |
+
|
| 1538 |
+
for frame_count in range(0, min(total_frames, max_frames)):
|
| 1539 |
+
ret, frame = cap.read()
|
| 1540 |
+
if not ret:
|
| 1541 |
+
break
|
| 1542 |
+
|
| 1543 |
+
# Only process this frame if:
|
| 1544 |
+
# 1. It's at the right interval based on frame_rate
|
| 1545 |
+
# 2. We haven't exceeded our processing budget
|
| 1546 |
+
process_this_frame = frame_count % frame_rate == 0
|
| 1547 |
+
|
| 1548 |
+
# With adaptive sampling, we might skip frames if they're similar to previous ones
|
| 1549 |
+
# Disable all similarity checks regardless of self.skip_similar_frames setting
|
| 1550 |
+
if False and process_this_frame and self.skip_similar_frames and last_processed_idx >= 0:
|
| 1551 |
+
# Only check similarity if we've processed some frames already
|
| 1552 |
+
if frame_count - last_processed_idx < 30: # Only check recent frames
|
| 1553 |
+
# Compute frame similarity
|
| 1554 |
+
current_small = cv2.resize(frame, (32, 32))
|
| 1555 |
+
gray_current = cv2.cvtColor(current_small, cv2.COLOR_BGR2GRAY)
|
| 1556 |
+
|
| 1557 |
+
if hasattr(self, 'last_processed_frame_small'):
|
| 1558 |
+
# Calculate difference
|
| 1559 |
+
diff = cv2.absdiff(gray_current, self.last_processed_frame_small)
|
| 1560 |
+
mean_diff = np.mean(diff)
|
| 1561 |
+
|
| 1562 |
+
# If very similar, consider skipping
|
| 1563 |
+
if mean_diff < 5.0: # Threshold for similarity
|
| 1564 |
+
consecutive_similar_frames += 1
|
| 1565 |
+
|
| 1566 |
+
# Skip if we've seen several similar frames
|
| 1567 |
+
# but ensure we still process at least one frame every 10
|
| 1568 |
+
if consecutive_similar_frames > 3 and (frame_count - last_processed_idx) < 10:
|
| 1569 |
+
process_this_frame = False
|
| 1570 |
+
else:
|
| 1571 |
+
consecutive_similar_frames = 0
|
| 1572 |
+
|
| 1573 |
+
# Save current frame for next comparison
|
| 1574 |
+
self.last_processed_frame_small = gray_current
|
| 1575 |
+
|
| 1576 |
+
if process_this_frame:
|
| 1577 |
+
logger.info(f"Processing frame {frame_count}/{total_frames} ({frame_count/total_frames*100:.1f}%)")
|
| 1578 |
+
last_processed_idx = frame_count
|
| 1579 |
+
|
| 1580 |
+
# Analyze frame
|
| 1581 |
+
frame_start_time = time.time()
|
| 1582 |
+
result = self.analyze_frame(frame, frame_count, backend)
|
| 1583 |
+
frame_end_time = time.time()
|
| 1584 |
+
|
| 1585 |
+
# Track performance
|
| 1586 |
+
processing_time = result.get('processing_time', 0)
|
| 1587 |
+
total_processing_time += processing_time
|
| 1588 |
+
frame_processing_times.append(processing_time)
|
| 1589 |
+
|
| 1590 |
+
# Capture detailed timing information from the result
|
| 1591 |
+
if 'timing_breakdown' in result:
|
| 1592 |
+
timing = result['timing_breakdown']
|
| 1593 |
+
detailed_timing['face_detection'].append(timing.get('face_detection', 0))
|
| 1594 |
+
detailed_timing['emotion_analysis'].append(timing.get('emotion_analysis', 0))
|
| 1595 |
+
detailed_timing['temporal_consistency'].append(timing.get('temporal_consistency', 0))
|
| 1596 |
+
detailed_timing['cache_check'].append(timing.get('cache_check', 0))
|
| 1597 |
+
detailed_timing['similarity_check'].append(timing.get('similarity_check', 0))
|
| 1598 |
+
detailed_timing['total_per_frame'].append(timing.get('total', processing_time))
|
| 1599 |
+
|
| 1600 |
+
# Track cache vs computed frames
|
| 1601 |
+
if result.get('from_cache', False):
|
| 1602 |
+
cache_stats['frames_from_cache'] += 1
|
| 1603 |
+
elif result.get('similar_to_previous', False):
|
| 1604 |
+
cache_stats['frames_similar'] += 1
|
| 1605 |
+
else:
|
| 1606 |
+
cache_stats['frames_computed'] += 1
|
| 1607 |
+
|
| 1608 |
+
# Track GPU usage for statistics
|
| 1609 |
+
if result:
|
| 1610 |
+
gpu_usage_stats["frames_processed"] += 1
|
| 1611 |
+
if result.get("gpu_used", False):
|
| 1612 |
+
gpu_usage_stats["gpu_used_frames"] += 1
|
| 1613 |
+
gpu_usage_stats["framework_used"] = result.get("framework", "Unknown")
|
| 1614 |
+
|
| 1615 |
+
if result:
|
| 1616 |
+
results.append(result)
|
| 1617 |
+
processed_count += 1
|
| 1618 |
+
|
| 1619 |
+
# Generate annotated frame if requested
|
| 1620 |
+
if generate_annotated_video and video_writer is not None:
|
| 1621 |
+
annotation_start = time.time()
|
| 1622 |
+
annotated_frame = self.annotate_frame(frame, result)
|
| 1623 |
+
video_writer.write(annotated_frame)
|
| 1624 |
+
timing_stats['annotation'] += time.time() - annotation_start
|
| 1625 |
+
elif generate_annotated_video and video_writer is not None:
|
| 1626 |
+
# Write original frame to annotated video
|
| 1627 |
+
annotation_start = time.time()
|
| 1628 |
+
video_writer.write(frame)
|
| 1629 |
+
timing_stats['annotation'] += time.time() - annotation_start
|
| 1630 |
+
|
| 1631 |
+
# Update progress periodically
|
| 1632 |
+
# Call status_callback more frequently, e.g., every frame or every few frames
|
| 1633 |
+
if status_callback and frame_count % 2 == 0: # Update every 2 frames
|
| 1634 |
+
# This phase (emotion frame analysis) should cover from 0% to 100% of ITS OWN progress.
|
| 1635 |
+
# The calling function (video_processor.process_video) will scale this to an overall progress range.
|
| 1636 |
+
current_phase_progress = (frame_count / min(total_frames, max_frames)) * 100
|
| 1637 |
+
status_callback(current_phase_progress)
|
| 1638 |
+
|
| 1639 |
+
# Ensure a final progress update for this phase if the loop didn't catch the last bit
|
| 1640 |
+
if status_callback:
|
| 1641 |
+
status_callback(100) # Signal 100% completion of this specific phase
|
| 1642 |
+
|
| 1643 |
+
timing_stats['frame_processing'] = time.time() - frame_processing_start
|
| 1644 |
+
video_saving_start = time.time()
|
| 1645 |
+
|
| 1646 |
+
# Release resources
|
| 1647 |
+
cap.release()
|
| 1648 |
+
if video_writer is not None:
|
| 1649 |
+
video_writer.release()
|
| 1650 |
+
|
| 1651 |
+
timing_stats['video_saving'] = time.time() - video_saving_start
|
| 1652 |
+
|
| 1653 |
+
# Calculate aggregate timing statistics
|
| 1654 |
+
if detailed_timing['face_detection']:
|
| 1655 |
+
timing_stats['face_detection'] = sum(detailed_timing['face_detection'])
|
| 1656 |
+
timing_stats['emotion_analysis'] = sum(detailed_timing['emotion_analysis'])
|
| 1657 |
+
timing_stats['temporal_consistency'] = sum(detailed_timing['temporal_consistency'])
|
| 1658 |
+
|
| 1659 |
+
# Log GPU usage
|
| 1660 |
+
if gpu_usage_stats["frames_processed"] > 0:
|
| 1661 |
+
gpu_percentage = (gpu_usage_stats["gpu_used_frames"] / gpu_usage_stats["frames_processed"]) * 100
|
| 1662 |
+
logger.info(f"GPU usage: {gpu_percentage:.2f}% of frames")
|
| 1663 |
+
logger.info(f"Framework used: {gpu_usage_stats['framework_used']}")
|
| 1664 |
+
|
| 1665 |
+
# Calculate average times
|
| 1666 |
+
mean_values = {}
|
| 1667 |
+
for key, values in detailed_timing.items():
|
| 1668 |
+
if values:
|
| 1669 |
+
mean_values[key] = sum(values) / len(values)
|
| 1670 |
+
else:
|
| 1671 |
+
mean_values[key] = 0
|
| 1672 |
+
|
| 1673 |
+
# Log performance statistics
|
| 1674 |
+
avg_time = total_processing_time / len(frame_processing_times) if frame_processing_times else 0
|
| 1675 |
+
logger.info(f"Processed {processed_count} frames in {total_processing_time:.2f} seconds (avg {avg_time:.4f} sec/frame)")
|
| 1676 |
+
logger.info(f"Frame sources: {cache_stats['frames_computed']} computed, {cache_stats['frames_from_cache']} from cache, {cache_stats['frames_similar']} similar frames")
|
| 1677 |
+
|
| 1678 |
+
# Log detailed timing information
|
| 1679 |
+
logger.info(f"Average time breakdown per frame (seconds):")
|
| 1680 |
+
logger.info(f" - Face detection: {mean_values.get('face_detection', 0):.4f}")
|
| 1681 |
+
logger.info(f" - Emotion analysis: {mean_values.get('emotion_analysis', 0):.4f}")
|
| 1682 |
+
logger.info(f" - Temporal consistency: {mean_values.get('temporal_consistency', 0):.4f}")
|
| 1683 |
+
logger.info(f" - Cache check: {mean_values.get('cache_check', 0):.4f}")
|
| 1684 |
+
logger.info(f" - Similarity check: {mean_values.get('similarity_check', 0):.4f}")
|
| 1685 |
+
|
| 1686 |
+
# Add device information to the results
|
| 1687 |
+
for result in results:
|
| 1688 |
+
result['device_used'] = DEVICE
|
| 1689 |
+
|
| 1690 |
+
# If caching was enabled, log statistics
|
| 1691 |
+
if self.enable_cache:
|
| 1692 |
+
frame_cache_stats = self.frame_cache.get_stats()
|
| 1693 |
+
emotion_cache_stats = self.emotion_cache.get_stats()
|
| 1694 |
+
logger.info(f"Frame cache: {frame_cache_stats['hit_rate']:.2f}% hit rate ({frame_cache_stats['hits']} hits, {frame_cache_stats['misses']} misses)")
|
| 1695 |
+
logger.info(f"Emotion cache: {emotion_cache_stats['hit_rate']:.2f}% hit rate ({emotion_cache_stats['hits']} hits, {emotion_cache_stats['misses']} misses)")
|
| 1696 |
+
|
| 1697 |
+
# Calculate and log total execution time
|
| 1698 |
+
timing_stats['total'] = time.time() - process_start_time
|
| 1699 |
+
logger.info(f"Total execution time: {timing_stats['total']:.2f} seconds")
|
| 1700 |
+
logger.info(f" - Video loading: {timing_stats['video_loading']:.2f}s ({(timing_stats['video_loading']/timing_stats['total']*100):.1f}%)")
|
| 1701 |
+
logger.info(f" - Frame processing: {timing_stats['frame_processing']:.2f}s ({(timing_stats['frame_processing']/timing_stats['total']*100):.1f}%)")
|
| 1702 |
+
if generate_annotated_video:
|
| 1703 |
+
logger.info(f" - Video annotation: {timing_stats['annotation']:.2f}s ({(timing_stats['annotation']/timing_stats['total']*100):.1f}%)")
|
| 1704 |
+
logger.info(f" - Video saving: {timing_stats['video_saving']:.2f}s ({(timing_stats['video_saving']/timing_stats['total']*100):.1f}%)")
|
| 1705 |
+
|
| 1706 |
+
# Add overall timing stats to return value
|
| 1707 |
+
timing_summary = {
|
| 1708 |
+
'total_time': timing_stats['total'],
|
| 1709 |
+
'frame_processing_time': timing_stats['frame_processing'],
|
| 1710 |
+
'avg_time_per_frame': avg_time,
|
| 1711 |
+
'frames_processed': processed_count,
|
| 1712 |
+
'frames_from_cache': cache_stats['frames_from_cache'],
|
| 1713 |
+
'frames_similar': cache_stats['frames_similar'],
|
| 1714 |
+
'avg_face_detection_time': mean_values.get('face_detection', 0),
|
| 1715 |
+
'avg_emotion_analysis_time': mean_values.get('emotion_analysis', 0),
|
| 1716 |
+
'cache_hit_rate': frame_cache_stats['hit_rate'] if self.enable_cache else 0
|
| 1717 |
+
}
|
| 1718 |
+
|
| 1719 |
+
# Create a metadata object to return with the results
|
| 1720 |
+
metadata = {
|
| 1721 |
+
'timing_stats': timing_stats,
|
| 1722 |
+
'detailed_timing': mean_values,
|
| 1723 |
+
'cache_stats': cache_stats if self.enable_cache else None,
|
| 1724 |
+
'gpu_usage': gpu_percentage if gpu_usage_stats["frames_processed"] > 0 else 0,
|
| 1725 |
+
'backend': backend,
|
| 1726 |
+
'device': DEVICE,
|
| 1727 |
+
'frames_processed': processed_count,
|
| 1728 |
+
'total_frames': total_frames,
|
| 1729 |
+
'frame_rate': frame_rate,
|
| 1730 |
+
'adaptive_sampling': adaptive_sampling
|
| 1731 |
+
}
|
| 1732 |
+
|
| 1733 |
+
return results, annotated_video_path, timing_summary, metadata
|
behavior_backend/app/services/processing/eye_contact_analyzer.py
ADDED
|
@@ -0,0 +1,1739 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import mediapipe as mp
|
| 3 |
+
import numpy as np
|
| 4 |
+
import time
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from collections import deque
|
| 7 |
+
from app.utils.device_utils import get_available_device
|
| 8 |
+
import json
|
| 9 |
+
import argparse
|
| 10 |
+
|
| 11 |
+
# Import LLM libraries - updated to use langchain_community
|
| 12 |
+
from langchain_openai import ChatOpenAI
|
| 13 |
+
from langchain.prompts import ChatPromptTemplate
|
| 14 |
+
from langchain.schema import HumanMessage, SystemMessage
|
| 15 |
+
from langchain_anthropic import ChatAnthropic
|
| 16 |
+
from langchain_groq import ChatGroq
|
| 17 |
+
|
| 18 |
+
# Initialize device once at module level
|
| 19 |
+
DEVICE = get_available_device()
|
| 20 |
+
|
| 21 |
+
class EyeContactAnalyzer:
|
| 22 |
+
def __init__(self, history_size=100, gaze_threshold=0.15, ear_threshold=0.21,
|
| 23 |
+
blink_threshold=0.17, blink_consec_frames=1, max_blink_duration=0.4,
|
| 24 |
+
ear_drop_threshold=0.035, use_adaptive_blink_threshold=True,
|
| 25 |
+
use_ear_drop_detection=True, fps=30):
|
| 26 |
+
"""
|
| 27 |
+
Initialize the eye contact analyzer.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
history_size: Number of frames to keep in history for rolling metrics
|
| 31 |
+
gaze_threshold: Threshold for determining eye contact based on normalized gaze deviation
|
| 32 |
+
ear_threshold: Threshold for determining if eyes are open based on eye aspect ratio
|
| 33 |
+
blink_threshold: Threshold for determining blink based on eye aspect ratio
|
| 34 |
+
blink_consec_frames: Minimum consecutive frames below blink_threshold to count as a blink
|
| 35 |
+
max_blink_duration: Maximum duration in seconds to consider as a blink (longer is closed eyes)
|
| 36 |
+
ear_drop_threshold: Minimum drop in EAR value to consider as a blink start
|
| 37 |
+
use_adaptive_blink_threshold: Whether to use adaptive thresholding for blink detection
|
| 38 |
+
use_ear_drop_detection: Whether to use EAR drop detection method instead of fixed threshold
|
| 39 |
+
fps: Frames per second of the video being analyzed (used for accurate duration calculation)
|
| 40 |
+
"""
|
| 41 |
+
# Initialize MediaPipe Face Mesh
|
| 42 |
+
self.mp_face_mesh = mp.solutions.face_mesh
|
| 43 |
+
self.face_mesh = self.mp_face_mesh.FaceMesh(
|
| 44 |
+
max_num_faces=1,
|
| 45 |
+
refine_landmarks=True,
|
| 46 |
+
min_detection_confidence=0.5,
|
| 47 |
+
min_tracking_confidence=0.5
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# MediaPipe landmark indices
|
| 51 |
+
self.LEFT_IRIS = [474, 475, 476, 477]
|
| 52 |
+
self.RIGHT_IRIS = [469, 470, 471, 472]
|
| 53 |
+
self.LEFT_EYE = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
|
| 54 |
+
self.RIGHT_EYE = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
|
| 55 |
+
|
| 56 |
+
# Thresholds
|
| 57 |
+
self.gaze_threshold = gaze_threshold
|
| 58 |
+
self.ear_threshold = ear_threshold
|
| 59 |
+
self.blink_threshold = blink_threshold
|
| 60 |
+
self.blink_consec_frames = blink_consec_frames
|
| 61 |
+
self.max_blink_duration = max_blink_duration
|
| 62 |
+
self.use_adaptive_blink_threshold = use_adaptive_blink_threshold
|
| 63 |
+
|
| 64 |
+
# EAR drop detection
|
| 65 |
+
self.ear_drop_threshold = ear_drop_threshold
|
| 66 |
+
self.use_ear_drop_detection = use_ear_drop_detection
|
| 67 |
+
self.baseline_ear = None
|
| 68 |
+
self.baseline_calibration_frames = 50
|
| 69 |
+
self.baseline_ears = []
|
| 70 |
+
|
| 71 |
+
# Stats tracking
|
| 72 |
+
self.history_size = history_size
|
| 73 |
+
self.eye_contact_history = deque(maxlen=history_size)
|
| 74 |
+
self.gaze_deviation_history = deque(maxlen=history_size)
|
| 75 |
+
self.ear_history = deque(maxlen=history_size)
|
| 76 |
+
|
| 77 |
+
# Timing variables
|
| 78 |
+
self.eye_contact_start = None
|
| 79 |
+
self.total_frames = 0
|
| 80 |
+
self.eye_contact_frames = 0
|
| 81 |
+
self.total_eye_contact_time = 0
|
| 82 |
+
self.longest_eye_contact = 0
|
| 83 |
+
self.current_eye_contact_duration = 0
|
| 84 |
+
self.start_time = time.time()
|
| 85 |
+
|
| 86 |
+
# Contact episode tracking (for interview pattern analysis)
|
| 87 |
+
self.contact_episodes = []
|
| 88 |
+
self.current_episode_start = None
|
| 89 |
+
self.non_contact_duration = 0
|
| 90 |
+
|
| 91 |
+
# Blink tracking variables
|
| 92 |
+
self.blink_counter = 0
|
| 93 |
+
self.total_blinks = 0
|
| 94 |
+
self.blink_start_time = None
|
| 95 |
+
self.blink_durations = []
|
| 96 |
+
self.last_blink_time = None
|
| 97 |
+
self.inter_blink_intervals = []
|
| 98 |
+
self.current_blink_frame_count = 0
|
| 99 |
+
self.eye_close_frames = 0
|
| 100 |
+
self.is_blinking = False
|
| 101 |
+
self.total_blink_frames = 0 # For frame-based duration calculation
|
| 102 |
+
self.prev_ear = None # Previous frame's EAR for drop detection
|
| 103 |
+
|
| 104 |
+
# Adaptive blink threshold variables
|
| 105 |
+
self.ear_min = float('inf')
|
| 106 |
+
self.ear_max = 0
|
| 107 |
+
self.adaptive_blink_threshold = blink_threshold
|
| 108 |
+
self.calibration_frames = 0
|
| 109 |
+
self.max_calibration_frames = 30 # Use first 30 frames to calibrate
|
| 110 |
+
|
| 111 |
+
# Per-eye blink tracking
|
| 112 |
+
self.left_eye_closed = False
|
| 113 |
+
self.right_eye_closed = False
|
| 114 |
+
|
| 115 |
+
# Debug information
|
| 116 |
+
self.ear_values = deque(maxlen=100) # Store recent EAR values for visualization
|
| 117 |
+
self.left_ear_values = deque(maxlen=100) # Store left eye EAR values
|
| 118 |
+
self.right_ear_values = deque(maxlen=100) # Store right eye EAR values
|
| 119 |
+
self.ear_drops = deque(maxlen=100) # Store EAR drop values
|
| 120 |
+
self.debug_mode = False
|
| 121 |
+
|
| 122 |
+
# Store fps for duration calculations
|
| 123 |
+
self.fps = fps
|
| 124 |
+
|
| 125 |
+
# Enhanced fatigue detection
|
| 126 |
+
self.perclos_history = deque(maxlen=int(fps * 60)) # 60 seconds of PERCLOS data
|
| 127 |
+
self.blink_rate_history = deque(maxlen=int(fps * 60)) # 60 seconds of blink rate data
|
| 128 |
+
self.blinks_per_minute = 0
|
| 129 |
+
self.baseline_perclos = None
|
| 130 |
+
self.baseline_blink_duration = None
|
| 131 |
+
self.baseline_blink_rate = None
|
| 132 |
+
self.baseline_calibration_complete = False
|
| 133 |
+
self.calibration_start_time = None # Will be set to start_time when the first frame is processed
|
| 134 |
+
self.fatigue_level_history = deque(maxlen=int(fps * 5)) # 5 seconds of fatigue levels
|
| 135 |
+
self.current_fatigue_level = "Low"
|
| 136 |
+
self.fatigue_level_changed_time = 0
|
| 137 |
+
self.hysteresis_delay = 5 # Seconds to wait before downgrading fatigue level
|
| 138 |
+
|
| 139 |
+
def reset_stats(self):
|
| 140 |
+
"""Reset all statistics for a new session."""
|
| 141 |
+
self.eye_contact_history.clear()
|
| 142 |
+
self.gaze_deviation_history.clear()
|
| 143 |
+
self.ear_history.clear()
|
| 144 |
+
self.eye_contact_start = None
|
| 145 |
+
self.total_frames = 0
|
| 146 |
+
self.eye_contact_frames = 0
|
| 147 |
+
self.total_eye_contact_time = 0
|
| 148 |
+
self.longest_eye_contact = 0
|
| 149 |
+
self.current_eye_contact_duration = 0
|
| 150 |
+
self.start_time = time.time()
|
| 151 |
+
self.contact_episodes = []
|
| 152 |
+
self.current_episode_start = None
|
| 153 |
+
self.non_contact_duration = 0
|
| 154 |
+
|
| 155 |
+
# Reset blink tracking data
|
| 156 |
+
self.blink_counter = 0
|
| 157 |
+
self.total_blinks = 0
|
| 158 |
+
self.blink_start_time = None
|
| 159 |
+
self.blink_durations = []
|
| 160 |
+
self.last_blink_time = None
|
| 161 |
+
self.inter_blink_intervals = []
|
| 162 |
+
self.current_blink_frame_count = 0
|
| 163 |
+
self.eye_close_frames = 0
|
| 164 |
+
self.is_blinking = False
|
| 165 |
+
self.total_blink_frames = 0
|
| 166 |
+
|
| 167 |
+
# Reset fatigue tracking
|
| 168 |
+
self.perclos_history.clear()
|
| 169 |
+
self.blink_rate_history.clear()
|
| 170 |
+
self.blinks_per_minute = 0
|
| 171 |
+
self.baseline_perclos = None
|
| 172 |
+
self.baseline_blink_duration = None
|
| 173 |
+
self.baseline_blink_rate = None
|
| 174 |
+
self.baseline_calibration_complete = False
|
| 175 |
+
self.calibration_start_time = None
|
| 176 |
+
self.fatigue_level_history.clear()
|
| 177 |
+
self.current_fatigue_level = "Low"
|
| 178 |
+
self.fatigue_level_changed_time = 0
|
| 179 |
+
|
| 180 |
+
def eye_aspect_ratio(self, eye_points):
|
| 181 |
+
"""
|
| 182 |
+
Calculate the eye aspect ratio to determine if eyes are open.
|
| 183 |
+
|
| 184 |
+
Args:
|
| 185 |
+
eye_points: np.array of eye landmark points
|
| 186 |
+
|
| 187 |
+
Returns:
|
| 188 |
+
float: eye aspect ratio
|
| 189 |
+
"""
|
| 190 |
+
# Compute the euclidean distances between vertical eye landmarks
|
| 191 |
+
vert1 = np.linalg.norm(eye_points[1] - eye_points[13])
|
| 192 |
+
vert2 = np.linalg.norm(eye_points[3] - eye_points[11])
|
| 193 |
+
# Compute the euclidean distance between horizontal eye landmarks
|
| 194 |
+
horiz = np.linalg.norm(eye_points[0] - eye_points[8])
|
| 195 |
+
# Compute eye aspect ratio
|
| 196 |
+
return (vert1 + vert2) / (2.0 * horiz) if horiz > 0 else 0
|
| 197 |
+
|
| 198 |
+
def calculate_adaptive_threshold(self, ear):
|
| 199 |
+
"""
|
| 200 |
+
Calculate an adaptive blink threshold based on observed EAR values.
|
| 201 |
+
|
| 202 |
+
Args:
|
| 203 |
+
ear: Current eye aspect ratio
|
| 204 |
+
|
| 205 |
+
Returns:
|
| 206 |
+
float: Updated adaptive blink threshold
|
| 207 |
+
"""
|
| 208 |
+
# Update min/max observed EAR values
|
| 209 |
+
if self.calibration_frames < self.max_calibration_frames:
|
| 210 |
+
self.ear_min = min(self.ear_min, ear)
|
| 211 |
+
self.ear_max = max(self.ear_max, ear)
|
| 212 |
+
self.calibration_frames += 1
|
| 213 |
+
|
| 214 |
+
# During calibration, use a percentage of the range
|
| 215 |
+
if self.ear_max > self.ear_min:
|
| 216 |
+
# Set threshold at 30% between min and max
|
| 217 |
+
self.adaptive_blink_threshold = self.ear_min + (self.ear_max - self.ear_min) * 0.3
|
| 218 |
+
|
| 219 |
+
elif len(self.ear_history) > 10:
|
| 220 |
+
# After calibration, continue to adapt based on recent history
|
| 221 |
+
recent_ears = list(self.ear_history)[-10:]
|
| 222 |
+
if max(recent_ears) > self.ear_max:
|
| 223 |
+
self.ear_max = max(recent_ears)
|
| 224 |
+
if min(recent_ears) < self.ear_min and min(recent_ears) > 0.05: # Avoid extreme outliers
|
| 225 |
+
self.ear_min = min(recent_ears)
|
| 226 |
+
|
| 227 |
+
# Calculate dynamic threshold - adjust to be 25% of the way from min to max
|
| 228 |
+
range_size = self.ear_max - self.ear_min
|
| 229 |
+
if range_size > 0:
|
| 230 |
+
self.adaptive_blink_threshold = self.ear_min + range_size * 0.25
|
| 231 |
+
|
| 232 |
+
# Ensure threshold is reasonable
|
| 233 |
+
self.adaptive_blink_threshold = max(0.1, min(0.22, self.adaptive_blink_threshold))
|
| 234 |
+
return self.adaptive_blink_threshold
|
| 235 |
+
|
| 236 |
+
def process_frame(self, frame, annotate=False):
|
| 237 |
+
"""
|
| 238 |
+
Process a single frame to analyze eye contact.
|
| 239 |
+
|
| 240 |
+
Args:
|
| 241 |
+
frame: The video frame (BGR format)
|
| 242 |
+
annotate: Whether to draw annotations on the frame
|
| 243 |
+
|
| 244 |
+
Returns:
|
| 245 |
+
dict: Eye contact metrics for this frame
|
| 246 |
+
frame: Annotated frame if annotate=True, otherwise original frame
|
| 247 |
+
"""
|
| 248 |
+
# Keep track of the frame number and calculate video time
|
| 249 |
+
self.total_frames += 1
|
| 250 |
+
frame_duration = 1.0 / self.fps # Duration of one frame
|
| 251 |
+
# Calculate time based on frame count rather than wall clock
|
| 252 |
+
current_time = self.start_time + ((self.total_frames - 1) * frame_duration)
|
| 253 |
+
|
| 254 |
+
# Initialize calibration_start_time if this is the first frame
|
| 255 |
+
if self.total_frames == 1:
|
| 256 |
+
self.calibration_start_time = self.start_time
|
| 257 |
+
self.fatigue_level_changed_time = self.start_time
|
| 258 |
+
|
| 259 |
+
frame_metrics = {
|
| 260 |
+
'timestamp': current_time,
|
| 261 |
+
'frame_number': self.total_frames,
|
| 262 |
+
'eye_contact': False,
|
| 263 |
+
'gaze_deviation': 1.0,
|
| 264 |
+
'eye_aspect_ratio': 0.0,
|
| 265 |
+
'left_eye_aspect_ratio': 0.0,
|
| 266 |
+
'right_eye_aspect_ratio': 0.0,
|
| 267 |
+
'eyes_open': False,
|
| 268 |
+
'is_blinking': False,
|
| 269 |
+
'ear_drop': 0.0
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
# Convert to RGB for MediaPipe
|
| 273 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 274 |
+
h, w, _ = frame.shape
|
| 275 |
+
|
| 276 |
+
# Process the frame
|
| 277 |
+
results = self.face_mesh.process(frame_rgb)
|
| 278 |
+
|
| 279 |
+
if results.multi_face_landmarks:
|
| 280 |
+
face_landmarks = results.multi_face_landmarks[0]
|
| 281 |
+
|
| 282 |
+
# Extract eye landmarks
|
| 283 |
+
left_iris_points = np.array([[int(face_landmarks.landmark[idx].x * w),
|
| 284 |
+
int(face_landmarks.landmark[idx].y * h)]
|
| 285 |
+
for idx in self.LEFT_IRIS])
|
| 286 |
+
right_iris_points = np.array([[int(face_landmarks.landmark[idx].x * w),
|
| 287 |
+
int(face_landmarks.landmark[idx].y * h)]
|
| 288 |
+
for idx in self.RIGHT_IRIS])
|
| 289 |
+
|
| 290 |
+
left_eye_points = np.array([[int(face_landmarks.landmark[idx].x * w),
|
| 291 |
+
int(face_landmarks.landmark[idx].y * h)]
|
| 292 |
+
for idx in self.LEFT_EYE])
|
| 293 |
+
right_eye_points = np.array([[int(face_landmarks.landmark[idx].x * w),
|
| 294 |
+
int(face_landmarks.landmark[idx].y * h)]
|
| 295 |
+
for idx in self.RIGHT_EYE])
|
| 296 |
+
|
| 297 |
+
# Calculate eye centers
|
| 298 |
+
left_eye_center = left_eye_points.mean(axis=0).astype(int)
|
| 299 |
+
right_eye_center = right_eye_points.mean(axis=0).astype(int)
|
| 300 |
+
|
| 301 |
+
# Calculate iris centers
|
| 302 |
+
left_iris_center = left_iris_points.mean(axis=0).astype(int)
|
| 303 |
+
right_iris_center = right_iris_points.mean(axis=0).astype(int)
|
| 304 |
+
|
| 305 |
+
# Calculate deviation from center
|
| 306 |
+
left_deviation = np.linalg.norm(left_iris_center - left_eye_center)
|
| 307 |
+
right_deviation = np.linalg.norm(right_iris_center - right_eye_center)
|
| 308 |
+
|
| 309 |
+
# Calculate eye width for normalization
|
| 310 |
+
left_eye_width = max(np.linalg.norm(left_eye_points[0] - left_eye_points[8]), 1)
|
| 311 |
+
right_eye_width = max(np.linalg.norm(right_eye_points[0] - right_eye_points[8]), 1)
|
| 312 |
+
|
| 313 |
+
# Normalize by eye width
|
| 314 |
+
left_deviation_norm = left_deviation / left_eye_width
|
| 315 |
+
right_deviation_norm = right_deviation / right_eye_width
|
| 316 |
+
|
| 317 |
+
# Average of both eyes
|
| 318 |
+
gaze_deviation = (left_deviation_norm + right_deviation_norm) / 2
|
| 319 |
+
|
| 320 |
+
# Calculate eye aspect ratio for each eye
|
| 321 |
+
left_ear = self.eye_aspect_ratio(left_eye_points)
|
| 322 |
+
right_ear = self.eye_aspect_ratio(right_eye_points)
|
| 323 |
+
ear = (left_ear + right_ear) / 2
|
| 324 |
+
|
| 325 |
+
# Store EAR values for debug visualization
|
| 326 |
+
self.ear_values.append(ear)
|
| 327 |
+
self.left_ear_values.append(left_ear)
|
| 328 |
+
self.right_ear_values.append(right_ear)
|
| 329 |
+
|
| 330 |
+
# Update baseline EAR for drop-based detection
|
| 331 |
+
if len(self.baseline_ears) < self.baseline_calibration_frames:
|
| 332 |
+
# Still in calibration phase
|
| 333 |
+
if ear > 0.15: # Only use reasonable eye open values
|
| 334 |
+
self.baseline_ears.append(ear)
|
| 335 |
+
if len(self.baseline_ears) == self.baseline_calibration_frames:
|
| 336 |
+
# Calibration complete - calculate baseline excluding outliers
|
| 337 |
+
sorted_ears = sorted(self.baseline_ears)
|
| 338 |
+
# Use middle 60% of values to avoid outliers
|
| 339 |
+
start_idx = int(len(sorted_ears) * 0.2)
|
| 340 |
+
end_idx = int(len(sorted_ears) * 0.8)
|
| 341 |
+
self.baseline_ear = np.mean(sorted_ears[start_idx:end_idx])
|
| 342 |
+
print(f"Calibrated baseline EAR: {self.baseline_ear:.3f}")
|
| 343 |
+
|
| 344 |
+
# Calculate EAR drop if we have previous values
|
| 345 |
+
ear_drop = 0.0
|
| 346 |
+
if self.prev_ear is not None:
|
| 347 |
+
ear_drop = max(0, self.prev_ear - ear)
|
| 348 |
+
self.ear_drops.append(ear_drop)
|
| 349 |
+
frame_metrics['ear_drop'] = ear_drop
|
| 350 |
+
|
| 351 |
+
# Update previous EAR value
|
| 352 |
+
self.prev_ear = ear
|
| 353 |
+
|
| 354 |
+
# Determine the adaptive blink threshold
|
| 355 |
+
if self.use_adaptive_blink_threshold:
|
| 356 |
+
current_blink_threshold = self.calculate_adaptive_threshold(ear)
|
| 357 |
+
else:
|
| 358 |
+
current_blink_threshold = self.blink_threshold
|
| 359 |
+
|
| 360 |
+
# Blink detection approach 1: Threshold-based
|
| 361 |
+
eyes_closed = left_ear < current_blink_threshold and right_ear < current_blink_threshold
|
| 362 |
+
eyes_open = left_ear > self.ear_threshold and right_ear > self.ear_threshold
|
| 363 |
+
|
| 364 |
+
# Blink detection approach 2: EAR drop-based
|
| 365 |
+
sudden_drop = False
|
| 366 |
+
if self.use_ear_drop_detection and self.baseline_ear is not None:
|
| 367 |
+
# Detect a significant drop from baseline
|
| 368 |
+
drop_threshold = self.ear_drop_threshold * self.baseline_ear # Proportional to baseline
|
| 369 |
+
sudden_drop = ear_drop > drop_threshold and ear < (self.baseline_ear * 0.85)
|
| 370 |
+
|
| 371 |
+
# Combined blink detection approach
|
| 372 |
+
blink_detected = eyes_closed or sudden_drop
|
| 373 |
+
|
| 374 |
+
# Enhanced blink state machine
|
| 375 |
+
if not self.is_blinking:
|
| 376 |
+
if blink_detected:
|
| 377 |
+
self.eye_close_frames += 1
|
| 378 |
+
if self.eye_close_frames >= self.blink_consec_frames:
|
| 379 |
+
self.is_blinking = True
|
| 380 |
+
self.blink_start_time = current_time
|
| 381 |
+
# We don't record inter-blink interval at blink start anymore
|
| 382 |
+
else:
|
| 383 |
+
self.eye_close_frames = 0 # Reset counter if eyes reopen before consecutive frames
|
| 384 |
+
else:
|
| 385 |
+
# Already in a blink
|
| 386 |
+
if not blink_detected and eyes_open:
|
| 387 |
+
# Blink ended
|
| 388 |
+
self.is_blinking = False
|
| 389 |
+
self.total_blinks += 1
|
| 390 |
+
blink_duration = current_time - self.blink_start_time
|
| 391 |
+
self.blink_durations.append(blink_duration)
|
| 392 |
+
|
| 393 |
+
# Record inter-blink interval when blink completes
|
| 394 |
+
if self.last_blink_time is not None:
|
| 395 |
+
# Record time between the end of the last blink and the end of this blink
|
| 396 |
+
interval = current_time - self.last_blink_time
|
| 397 |
+
self.inter_blink_intervals.append(interval)
|
| 398 |
+
# Debug print to verify intervals
|
| 399 |
+
if self.debug_mode and self.total_blinks % 5 == 0: # Print every 5 blinks
|
| 400 |
+
print(f"Blink #{self.total_blinks} - Interval: {interval:.2f}s, Avg interval: {np.mean(self.inter_blink_intervals):.2f}s")
|
| 401 |
+
|
| 402 |
+
self.last_blink_time = current_time
|
| 403 |
+
self.eye_close_frames = 0
|
| 404 |
+
self.current_blink_frame_count = 0
|
| 405 |
+
self.total_blink_frames = 0
|
| 406 |
+
elif current_time - self.blink_start_time > self.max_blink_duration:
|
| 407 |
+
# Too long to be a blink (probably just closed eyes)
|
| 408 |
+
self.is_blinking = False
|
| 409 |
+
self.eye_close_frames = 0
|
| 410 |
+
self.current_blink_frame_count = 0
|
| 411 |
+
self.total_blink_frames = 0
|
| 412 |
+
else:
|
| 413 |
+
# Blink continuing
|
| 414 |
+
self.total_blink_frames += 1
|
| 415 |
+
|
| 416 |
+
# Determine eye contact
|
| 417 |
+
eyes_open = ear > self.ear_threshold
|
| 418 |
+
eye_contact = gaze_deviation < self.gaze_threshold and eyes_open
|
| 419 |
+
|
| 420 |
+
# Update metrics
|
| 421 |
+
frame_metrics['gaze_deviation'] = gaze_deviation
|
| 422 |
+
frame_metrics['eye_aspect_ratio'] = ear
|
| 423 |
+
frame_metrics['left_eye_aspect_ratio'] = left_ear
|
| 424 |
+
frame_metrics['right_eye_aspect_ratio'] = right_ear
|
| 425 |
+
frame_metrics['eyes_open'] = eyes_open
|
| 426 |
+
frame_metrics['eye_contact'] = eye_contact
|
| 427 |
+
frame_metrics['is_blinking'] = self.is_blinking
|
| 428 |
+
frame_metrics['blink_threshold'] = current_blink_threshold
|
| 429 |
+
|
| 430 |
+
# Update history
|
| 431 |
+
self.eye_contact_history.append(1 if eye_contact else 0)
|
| 432 |
+
self.gaze_deviation_history.append(gaze_deviation)
|
| 433 |
+
self.ear_history.append(ear)
|
| 434 |
+
|
| 435 |
+
# Eye contact state machine
|
| 436 |
+
if eye_contact:
|
| 437 |
+
self.eye_contact_frames += 1
|
| 438 |
+
|
| 439 |
+
# Start a new eye contact episode if not already started
|
| 440 |
+
if self.eye_contact_start is None:
|
| 441 |
+
self.eye_contact_start = current_time
|
| 442 |
+
# If this isn't the first episode, log the previous non-contact gap
|
| 443 |
+
if self.current_episode_start is not None:
|
| 444 |
+
self.contact_episodes.append({
|
| 445 |
+
'start_time': self.current_episode_start,
|
| 446 |
+
'end_time': current_time - self.non_contact_duration,
|
| 447 |
+
'duration': current_time - self.current_episode_start - self.non_contact_duration,
|
| 448 |
+
'non_contact_before': self.non_contact_duration
|
| 449 |
+
})
|
| 450 |
+
self.current_episode_start = current_time
|
| 451 |
+
self.non_contact_duration = 0
|
| 452 |
+
|
| 453 |
+
self.current_eye_contact_duration = current_time - self.eye_contact_start
|
| 454 |
+
self.longest_eye_contact = max(self.longest_eye_contact, self.current_eye_contact_duration)
|
| 455 |
+
else:
|
| 456 |
+
# If eye contact ended, finalize the episode
|
| 457 |
+
if self.eye_contact_start is not None:
|
| 458 |
+
contact_duration = current_time - self.eye_contact_start
|
| 459 |
+
self.total_eye_contact_time += contact_duration
|
| 460 |
+
self.longest_eye_contact = max(self.longest_eye_contact, contact_duration)
|
| 461 |
+
self.eye_contact_start = None
|
| 462 |
+
self.current_eye_contact_duration = 0
|
| 463 |
+
|
| 464 |
+
# Accumulate non-contact duration - now using exact frame duration
|
| 465 |
+
self.non_contact_duration += 1.0/self.fps # Precise frame duration
|
| 466 |
+
|
| 467 |
+
# Annotate frame if requested
|
| 468 |
+
if annotate:
|
| 469 |
+
# Draw iris and eye centers
|
| 470 |
+
if results.multi_face_landmarks:
|
| 471 |
+
cv2.circle(frame, tuple(left_iris_center), 3, (0, 255, 0), -1)
|
| 472 |
+
cv2.circle(frame, tuple(right_iris_center), 3, (0, 255, 0), -1)
|
| 473 |
+
cv2.circle(frame, tuple(left_eye_center), 3, (255, 0, 0), -1)
|
| 474 |
+
cv2.circle(frame, tuple(right_eye_center), 3, (255, 0, 0), -1)
|
| 475 |
+
|
| 476 |
+
# Draw contours of eyes
|
| 477 |
+
for idx in range(len(self.LEFT_EYE)):
|
| 478 |
+
next_idx = (idx + 1) % len(self.LEFT_EYE)
|
| 479 |
+
|
| 480 |
+
# Make eye contour red if blinking
|
| 481 |
+
eye_color = (0, 0, 255) if self.is_blinking else (0, 255, 255)
|
| 482 |
+
|
| 483 |
+
cv2.line(frame, tuple(left_eye_points[idx]), tuple(left_eye_points[next_idx]), eye_color, 1)
|
| 484 |
+
cv2.line(frame, tuple(right_eye_points[idx]), tuple(right_eye_points[next_idx]), eye_color, 1)
|
| 485 |
+
|
| 486 |
+
# Organize visualization into different sections with semi-transparent backgrounds
|
| 487 |
+
h, w, _ = frame.shape
|
| 488 |
+
|
| 489 |
+
# Function to add text with background
|
| 490 |
+
def put_text_with_background(img, text, position, font, font_scale, text_color, bg_color, thickness=1, bg_alpha=0.7):
|
| 491 |
+
# Get text size
|
| 492 |
+
text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
|
| 493 |
+
|
| 494 |
+
# Create overlay for semi-transparent bg
|
| 495 |
+
overlay = img.copy()
|
| 496 |
+
# Add padding to background
|
| 497 |
+
padding = 5
|
| 498 |
+
bg_rect = (position[0]-padding, position[1]-text_size[1]-padding,
|
| 499 |
+
text_size[0]+padding*2, text_size[1]+padding*2)
|
| 500 |
+
cv2.rectangle(overlay, (bg_rect[0], bg_rect[1]),
|
| 501 |
+
(bg_rect[0]+bg_rect[2], bg_rect[1]+bg_rect[3]),
|
| 502 |
+
bg_color, -1)
|
| 503 |
+
# Apply the overlay
|
| 504 |
+
cv2.addWeighted(overlay, bg_alpha, img, 1-bg_alpha, 0, img)
|
| 505 |
+
# Then draw text
|
| 506 |
+
cv2.putText(img, text, position, font, font_scale, text_color, thickness)
|
| 507 |
+
|
| 508 |
+
# ---------- Section 1: Top Left - Basic Eye Contact Info ----------
|
| 509 |
+
# Eye contact status
|
| 510 |
+
contact_text = "Eye Contact: YES" if eye_contact else "Eye Contact: NO"
|
| 511 |
+
contact_color = (0, 255, 0) if eye_contact else (0, 0, 255)
|
| 512 |
+
put_text_with_background(frame, contact_text, (20, 30), cv2.FONT_HERSHEY_SIMPLEX,
|
| 513 |
+
0.7, contact_color, (50, 50, 50), 2)
|
| 514 |
+
|
| 515 |
+
# Eye contact duration
|
| 516 |
+
put_text_with_background(frame, f"Current Duration: {self.current_eye_contact_duration:.1f}s",
|
| 517 |
+
(20, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), (50, 50, 50))
|
| 518 |
+
|
| 519 |
+
# Gaze metrics
|
| 520 |
+
put_text_with_background(frame, f"Gaze: {gaze_deviation:.3f} (Thresh: {self.gaze_threshold})",
|
| 521 |
+
(20, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), (50, 50, 50))
|
| 522 |
+
|
| 523 |
+
# ---------- Section 2: Top Right - Blink Info ----------
|
| 524 |
+
# Blink status and count
|
| 525 |
+
right_col_x = w - 280
|
| 526 |
+
blink_status = f"Blinks: {self.total_blinks}"
|
| 527 |
+
put_text_with_background(frame, blink_status, (right_col_x, 30),
|
| 528 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), (50, 50, 50))
|
| 529 |
+
|
| 530 |
+
if self.is_blinking:
|
| 531 |
+
put_text_with_background(frame, "BLINKING", (right_col_x, 60),
|
| 532 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), (50, 50, 50), 2)
|
| 533 |
+
if current_time - self.blink_start_time > 0:
|
| 534 |
+
blink_time = f"Duration: {(current_time - self.blink_start_time)*1000:.0f}ms"
|
| 535 |
+
put_text_with_background(frame, blink_time, (right_col_x, 90),
|
| 536 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), (50, 50, 50))
|
| 537 |
+
|
| 538 |
+
# Display EAR values
|
| 539 |
+
put_text_with_background(frame, f"EAR: {ear:.3f} (Threshold: {self.ear_threshold})",
|
| 540 |
+
(right_col_x, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), (50, 50, 50))
|
| 541 |
+
|
| 542 |
+
put_text_with_background(frame, f"Left EAR: {left_ear:.3f} Right EAR: {right_ear:.3f}",
|
| 543 |
+
(right_col_x, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), (50, 50, 50))
|
| 544 |
+
|
| 545 |
+
# Blink threshold display
|
| 546 |
+
threshold_color = (255, 255, 255)
|
| 547 |
+
threshold_text = f"Blink Threshold: {current_blink_threshold:.3f}"
|
| 548 |
+
threshold_text += " (adaptive)" if self.use_adaptive_blink_threshold else " (fixed)"
|
| 549 |
+
put_text_with_background(frame, threshold_text, (right_col_x, 180),
|
| 550 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, threshold_color, (50, 50, 50))
|
| 551 |
+
|
| 552 |
+
# Display EAR drop metrics
|
| 553 |
+
if self.baseline_ear is not None:
|
| 554 |
+
baseline_color = (0, 255, 255)
|
| 555 |
+
put_text_with_background(frame, f"Baseline EAR: {self.baseline_ear:.3f}",
|
| 556 |
+
(right_col_x, 210), cv2.FONT_HERSHEY_SIMPLEX, 0.5, baseline_color, (50, 50, 50))
|
| 557 |
+
|
| 558 |
+
if ear_drop > 0:
|
| 559 |
+
drop_color = (0, 255, 255) if ear_drop > self.ear_drop_threshold else (255, 255, 255)
|
| 560 |
+
drop_text = f"EAR Drop: {ear_drop:.3f} (Threshold: {self.ear_drop_threshold:.3f})"
|
| 561 |
+
put_text_with_background(frame, drop_text, (right_col_x, 240),
|
| 562 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, drop_color, (50, 50, 50))
|
| 563 |
+
|
| 564 |
+
# ---------- Section 3: Bottom Left - Fatigue and Attention ----------
|
| 565 |
+
# Display fatigue and attention indicators if we have enough data
|
| 566 |
+
if len(self.ear_history) > 20:
|
| 567 |
+
bottom_y_start = h - 150
|
| 568 |
+
|
| 569 |
+
# Calculate fatigue level
|
| 570 |
+
recent_ears = list(self.ear_history)[-20:]
|
| 571 |
+
closed_count = sum(1 for e in recent_ears if e < self.ear_threshold * 0.8)
|
| 572 |
+
current_perclos = (closed_count / len(recent_ears)) * 100
|
| 573 |
+
|
| 574 |
+
# Store in perclos history for long-term analysis
|
| 575 |
+
self.perclos_history.append(current_perclos)
|
| 576 |
+
|
| 577 |
+
# Calculate blink rate (blinks per minute)
|
| 578 |
+
if self.last_blink_time is not None and current_time - self.last_blink_time < 1.0:
|
| 579 |
+
# Just had a blink
|
| 580 |
+
self.blink_rate_history.append(1)
|
| 581 |
+
else:
|
| 582 |
+
self.blink_rate_history.append(0)
|
| 583 |
+
|
| 584 |
+
# Calculate blinks per minute based on recent history
|
| 585 |
+
if len(self.blink_rate_history) > 0:
|
| 586 |
+
self.blinks_per_minute = sum(self.blink_rate_history) * (60 / len(self.blink_rate_history))
|
| 587 |
+
|
| 588 |
+
# Calculate lid closure index - combines blink rate and duration
|
| 589 |
+
current_lid_closure_index = 0
|
| 590 |
+
if self.blink_durations and len(self.blink_durations) > 3:
|
| 591 |
+
recent_blink_duration = np.mean(self.blink_durations[-min(len(self.blink_durations), 10):])
|
| 592 |
+
current_lid_closure_index = self.blinks_per_minute * recent_blink_duration
|
| 593 |
+
|
| 594 |
+
# Personalization: Establish baseline during first minute if not done yet
|
| 595 |
+
if not self.baseline_calibration_complete and current_time - self.calibration_start_time >= 60:
|
| 596 |
+
if len(self.perclos_history) > 0:
|
| 597 |
+
self.baseline_perclos = np.mean(list(self.perclos_history))
|
| 598 |
+
if len(self.blink_durations) > 3:
|
| 599 |
+
self.baseline_blink_duration = np.mean(self.blink_durations)
|
| 600 |
+
if len(self.blink_rate_history) > 0:
|
| 601 |
+
self.baseline_blink_rate = self.blinks_per_minute
|
| 602 |
+
self.baseline_calibration_complete = True
|
| 603 |
+
print(f"Fatigue detection calibration complete - Baseline PERCLOS: {self.baseline_perclos:.1f}%, "
|
| 604 |
+
f"Baseline blink duration: {self.baseline_blink_duration:.3f}s, "
|
| 605 |
+
f"Baseline blink rate: {self.baseline_blink_rate:.1f} blinks/min")
|
| 606 |
+
|
| 607 |
+
# Time-based smoothing: Use moving averages
|
| 608 |
+
smoothed_perclos = np.mean(list(self.perclos_history)[-min(len(self.perclos_history), 30):])
|
| 609 |
+
smoothed_blink_duration = 0
|
| 610 |
+
if self.blink_durations:
|
| 611 |
+
smoothed_blink_duration = np.mean(self.blink_durations[-min(len(self.blink_durations), 10):])
|
| 612 |
+
|
| 613 |
+
# Advanced fatigue detection with personalization
|
| 614 |
+
new_fatigue_level = "Low"
|
| 615 |
+
fatigue_color = (0, 255, 0) # Green
|
| 616 |
+
|
| 617 |
+
# Use personalized thresholds if available, otherwise use defaults
|
| 618 |
+
high_perclos_threshold = 15
|
| 619 |
+
moderate_perclos_threshold = 8
|
| 620 |
+
high_blink_duration_threshold = 0.3
|
| 621 |
+
moderate_blink_duration_threshold = 0.25
|
| 622 |
+
high_blink_rate_threshold = 30
|
| 623 |
+
moderate_blink_rate_threshold = 25
|
| 624 |
+
lid_closure_high_threshold = 0.06
|
| 625 |
+
|
| 626 |
+
if self.baseline_calibration_complete:
|
| 627 |
+
# Personalized thresholds
|
| 628 |
+
high_perclos_threshold = self.baseline_perclos + 8
|
| 629 |
+
moderate_perclos_threshold = self.baseline_perclos + 4
|
| 630 |
+
|
| 631 |
+
if self.baseline_blink_duration:
|
| 632 |
+
high_blink_duration_threshold = self.baseline_blink_duration * 1.4
|
| 633 |
+
moderate_blink_duration_threshold = self.baseline_blink_duration * 1.2
|
| 634 |
+
|
| 635 |
+
if self.baseline_blink_rate:
|
| 636 |
+
high_blink_rate_threshold = self.baseline_blink_rate * 1.4
|
| 637 |
+
moderate_blink_rate_threshold = self.baseline_blink_rate * 1.2
|
| 638 |
+
|
| 639 |
+
# Determine new fatigue level based on multiple indicators
|
| 640 |
+
if (smoothed_perclos > high_perclos_threshold or
|
| 641 |
+
(self.blink_durations and smoothed_blink_duration > high_blink_duration_threshold and smoothed_perclos > 2) or
|
| 642 |
+
(self.blinks_per_minute > high_blink_rate_threshold and smoothed_perclos > 2) or
|
| 643 |
+
(current_lid_closure_index > lid_closure_high_threshold and smoothed_perclos > 1)):
|
| 644 |
+
new_fatigue_level = "High"
|
| 645 |
+
fatigue_color = (0, 0, 255) # Red
|
| 646 |
+
elif (smoothed_perclos > moderate_perclos_threshold or
|
| 647 |
+
(self.blink_durations and smoothed_blink_duration > moderate_blink_duration_threshold and smoothed_perclos > 1) or
|
| 648 |
+
(self.blinks_per_minute > moderate_blink_rate_threshold and smoothed_perclos > 1) or
|
| 649 |
+
(current_lid_closure_index > lid_closure_high_threshold * 0.8 and smoothed_perclos > 0.5)):
|
| 650 |
+
new_fatigue_level = "Moderate"
|
| 651 |
+
fatigue_color = (0, 165, 255) # Orange
|
| 652 |
+
|
| 653 |
+
# Extra safety check - if PERCLOS is extremely low (< 0.5%), don't allow High fatigue level
|
| 654 |
+
if smoothed_perclos < 0.5 and new_fatigue_level == "High":
|
| 655 |
+
new_fatigue_level = "Moderate"
|
| 656 |
+
fatigue_color = (0, 165, 255) # Orange
|
| 657 |
+
|
| 658 |
+
# Second safety check - if PERCLOS is zero, don't allow anything above Low
|
| 659 |
+
if smoothed_perclos == 0:
|
| 660 |
+
new_fatigue_level = "Low"
|
| 661 |
+
fatigue_color = (0, 255, 0) # Green
|
| 662 |
+
|
| 663 |
+
# Apply hysteresis to prevent flickering
|
| 664 |
+
if new_fatigue_level != self.current_fatigue_level:
|
| 665 |
+
# If upgrading fatigue level (e.g. Low→Moderate or Moderate→High), change immediately
|
| 666 |
+
if (new_fatigue_level == "Moderate" and self.current_fatigue_level == "Low") or \
|
| 667 |
+
(new_fatigue_level == "High" and self.current_fatigue_level != "High"):
|
| 668 |
+
self.current_fatigue_level = new_fatigue_level
|
| 669 |
+
self.fatigue_level_changed_time = current_time
|
| 670 |
+
# If downgrading fatigue level, only change if sustained for hysteresis_delay seconds
|
| 671 |
+
elif (new_fatigue_level == "Low" or new_fatigue_level == "Moderate") and \
|
| 672 |
+
current_time - self.fatigue_level_changed_time > self.hysteresis_delay:
|
| 673 |
+
self.current_fatigue_level = new_fatigue_level
|
| 674 |
+
self.fatigue_level_changed_time = current_time
|
| 675 |
+
|
| 676 |
+
# Store history of fatigue levels for stability analysis
|
| 677 |
+
self.fatigue_level_history.append(self.current_fatigue_level)
|
| 678 |
+
|
| 679 |
+
# Use the current (hysteresis-smoothed) fatigue level
|
| 680 |
+
fatigue_level = self.current_fatigue_level
|
| 681 |
+
|
| 682 |
+
# Determine attention level from gaze stability
|
| 683 |
+
recent_gaze = list(self.gaze_deviation_history)[-20:] if self.gaze_deviation_history else []
|
| 684 |
+
gaze_stability = np.std(recent_gaze) if recent_gaze else 0
|
| 685 |
+
|
| 686 |
+
# Calculate average EAR for last 20 frames
|
| 687 |
+
avg_ear = np.mean(recent_ears) if recent_ears else 0
|
| 688 |
+
|
| 689 |
+
# Show attention level - IMPROVED ALGORITHM
|
| 690 |
+
attention_level = "Focused"
|
| 691 |
+
attention_color = (0, 255, 0) # Green
|
| 692 |
+
|
| 693 |
+
# Rule 1: High gaze deviation = Distracted
|
| 694 |
+
if gaze_stability > 0.25:
|
| 695 |
+
attention_level = "Distracted"
|
| 696 |
+
attention_color = (0, 0, 255) # Red
|
| 697 |
+
# Rule 2: "Zoning Out" needs multiple indicators, not just stable gaze
|
| 698 |
+
# IMPROVED: Low gaze deviation alone doesn't mean zoning out - needs low EAR and high PERCLOS
|
| 699 |
+
elif gaze_stability < 0.03 and len(recent_gaze) > 10 and avg_ear < self.ear_threshold * 0.9 and current_perclos > 10:
|
| 700 |
+
attention_level = "Zoning Out"
|
| 701 |
+
attention_color = (0, 0, 255) # Red
|
| 702 |
+
# Rule 3: Inconsistent gaze pattern
|
| 703 |
+
elif gaze_stability > 0.15:
|
| 704 |
+
attention_level = "Inconsistent"
|
| 705 |
+
attention_color = (0, 165, 255) # Orange
|
| 706 |
+
|
| 707 |
+
# Display enhanced fatigue metrics
|
| 708 |
+
fatigue_text = f"Fatigue: {fatigue_level} (PERCLOS: {smoothed_perclos:.1f}%)"
|
| 709 |
+
put_text_with_background(frame, fatigue_text, (20, bottom_y_start),
|
| 710 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, fatigue_color, (50, 50, 50))
|
| 711 |
+
|
| 712 |
+
if self.baseline_calibration_complete:
|
| 713 |
+
# Add blink rate and duration info
|
| 714 |
+
blink_info = f"Blink rate: {self.blinks_per_minute:.1f}/min, Avg Dur: {smoothed_blink_duration*1000:.0f}ms"
|
| 715 |
+
put_text_with_background(frame, blink_info, (20, bottom_y_start + 30),
|
| 716 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, fatigue_color, (50, 50, 50))
|
| 717 |
+
|
| 718 |
+
# Display lid closure index
|
| 719 |
+
lid_color = (0, 0, 255) if current_lid_closure_index > lid_closure_high_threshold else fatigue_color
|
| 720 |
+
lid_text = f"Lid closure index: {current_lid_closure_index:.3f} (Thresh: {lid_closure_high_threshold:.3f})"
|
| 721 |
+
put_text_with_background(frame, lid_text, (20, bottom_y_start + 60),
|
| 722 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, lid_color, (50, 50, 50))
|
| 723 |
+
|
| 724 |
+
# Display attention status
|
| 725 |
+
attention_text = f"Attention: {attention_level}"
|
| 726 |
+
put_text_with_background(frame, attention_text, (20, bottom_y_start + 90),
|
| 727 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, attention_color, (50, 50, 50))
|
| 728 |
+
|
| 729 |
+
# Draw EAR graph for debugging
|
| 730 |
+
if self.debug_mode and len(self.ear_values) > 1:
|
| 731 |
+
# Create a small graph at the bottom of the frame
|
| 732 |
+
graph_height = 100
|
| 733 |
+
graph_width = 200
|
| 734 |
+
graph_x = w - graph_width - 20
|
| 735 |
+
graph_y = h - graph_height - 20
|
| 736 |
+
|
| 737 |
+
# Create background for graph
|
| 738 |
+
cv2.rectangle(frame, (graph_x, graph_y),
|
| 739 |
+
(graph_x + graph_width, graph_y + graph_height),
|
| 740 |
+
(0, 0, 0), -1)
|
| 741 |
+
|
| 742 |
+
# Draw thresholds
|
| 743 |
+
blink_y = graph_y + graph_height - int(current_blink_threshold * graph_height * 2.5)
|
| 744 |
+
ear_y = graph_y + graph_height - int(self.ear_threshold * graph_height * 2.5)
|
| 745 |
+
|
| 746 |
+
# Draw blink threshold line
|
| 747 |
+
cv2.line(frame, (graph_x, blink_y),
|
| 748 |
+
(graph_x + graph_width, blink_y),
|
| 749 |
+
(0, 255, 255), 1)
|
| 750 |
+
# Draw ear threshold line
|
| 751 |
+
cv2.line(frame, (graph_x, ear_y),
|
| 752 |
+
(graph_x + graph_width, ear_y),
|
| 753 |
+
(255, 0, 255), 1)
|
| 754 |
+
|
| 755 |
+
# Draw baseline EAR line
|
| 756 |
+
if self.baseline_ear is not None:
|
| 757 |
+
baseline_y = graph_y + graph_height - int(self.baseline_ear * graph_height * 2.5)
|
| 758 |
+
cv2.line(frame, (graph_x, baseline_y),
|
| 759 |
+
(graph_x + graph_width, baseline_y),
|
| 760 |
+
(255, 255, 0), 1)
|
| 761 |
+
|
| 762 |
+
# Draw legend
|
| 763 |
+
cv2.putText(frame, f"BT: {current_blink_threshold:.2f}",
|
| 764 |
+
(graph_x + 5, blink_y - 5),
|
| 765 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 255), 1)
|
| 766 |
+
cv2.putText(frame, f"ET: {self.ear_threshold:.2f}",
|
| 767 |
+
(graph_x + 5, ear_y - 5),
|
| 768 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 0, 255), 1)
|
| 769 |
+
if self.baseline_ear is not None:
|
| 770 |
+
cv2.putText(frame, f"BL: {self.baseline_ear:.2f}",
|
| 771 |
+
(graph_x + 5, baseline_y - 5),
|
| 772 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 0), 1)
|
| 773 |
+
|
| 774 |
+
# Draw EAR history - average
|
| 775 |
+
ear_list = list(self.ear_values)
|
| 776 |
+
for i in range(1, len(ear_list)):
|
| 777 |
+
# Scale EAR value to graph height (EAR typically 0-0.4)
|
| 778 |
+
pt1_x = graph_x + (i-1) * graph_width // len(ear_list)
|
| 779 |
+
pt1_y = graph_y + graph_height - int(ear_list[i-1] * graph_height * 2.5)
|
| 780 |
+
pt2_x = graph_x + i * graph_width // len(ear_list)
|
| 781 |
+
pt2_y = graph_y + graph_height - int(ear_list[i] * graph_height * 2.5)
|
| 782 |
+
|
| 783 |
+
cv2.line(frame, (pt1_x, pt1_y), (pt2_x, pt2_y), (0, 255, 0), 1)
|
| 784 |
+
|
| 785 |
+
# Draw left eye EAR in blue
|
| 786 |
+
left_ear_list = list(self.left_ear_values)
|
| 787 |
+
for i in range(1, len(left_ear_list)):
|
| 788 |
+
pt1_x = graph_x + (i-1) * graph_width // len(left_ear_list)
|
| 789 |
+
pt1_y = graph_y + graph_height - int(left_ear_list[i-1] * graph_height * 2.5)
|
| 790 |
+
pt2_x = graph_x + i * graph_width // len(left_ear_list)
|
| 791 |
+
pt2_y = graph_y + graph_height - int(left_ear_list[i] * graph_height * 2.5)
|
| 792 |
+
|
| 793 |
+
cv2.line(frame, (pt1_x, pt1_y), (pt2_x, pt2_y), (255, 0, 0), 1)
|
| 794 |
+
|
| 795 |
+
# Draw right eye EAR in red
|
| 796 |
+
right_ear_list = list(self.right_ear_values)
|
| 797 |
+
for i in range(1, len(right_ear_list)):
|
| 798 |
+
pt1_x = graph_x + (i-1) * graph_width // len(right_ear_list)
|
| 799 |
+
pt1_y = graph_y + graph_height - int(right_ear_list[i-1] * graph_height * 2.5)
|
| 800 |
+
pt2_x = graph_x + i * graph_width // len(right_ear_list)
|
| 801 |
+
pt2_y = graph_y + graph_height - int(right_ear_list[i] * graph_height * 2.5)
|
| 802 |
+
|
| 803 |
+
cv2.line(frame, (pt1_x, pt1_y), (pt2_x, pt2_y), (0, 0, 255), 1)
|
| 804 |
+
|
| 805 |
+
# Draw EAR drops in yellow if we have enough data
|
| 806 |
+
if len(self.ear_drops) > 1:
|
| 807 |
+
ear_drop_list = list(self.ear_drops)
|
| 808 |
+
for i in range(1, len(ear_drop_list)):
|
| 809 |
+
# Scale drop values (usually smaller) to be visible on the graph
|
| 810 |
+
drop_scale = 4.0
|
| 811 |
+
pt1_x = graph_x + (i-1) * graph_width // len(ear_drop_list)
|
| 812 |
+
pt1_y = graph_y + graph_height - int(ear_drop_list[i-1] * graph_height * drop_scale)
|
| 813 |
+
pt2_x = graph_x + i * graph_width // len(ear_drop_list)
|
| 814 |
+
pt2_y = graph_y + graph_height - int(ear_drop_list[i] * graph_height * drop_scale)
|
| 815 |
+
|
| 816 |
+
# Keep points in bounds
|
| 817 |
+
pt1_y = max(graph_y, min(pt1_y, graph_y + graph_height))
|
| 818 |
+
pt2_y = max(graph_y, min(pt2_y, graph_y + graph_height))
|
| 819 |
+
|
| 820 |
+
cv2.line(frame, (pt1_x, pt1_y), (pt2_x, pt2_y), (0, 255, 255), 1)
|
| 821 |
+
|
| 822 |
+
# Label graph
|
| 823 |
+
cv2.putText(frame, "EAR History (Avg=green, L=blue, R=red, Drops=yellow)",
|
| 824 |
+
(graph_x, graph_y - 5),
|
| 825 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
|
| 826 |
+
|
| 827 |
+
return frame_metrics, frame
|
| 828 |
+
|
| 829 |
+
|
| 830 |
+
|
| 831 |
+
def get_stats(self):
|
| 832 |
+
"""Get statistics about eye contact, blinking patterns, etc."""
|
| 833 |
+
# Calculate total duration based on processed frames and fps
|
| 834 |
+
frame_duration = 1.0 / self.fps
|
| 835 |
+
total_duration_seconds = self.total_frames * frame_duration
|
| 836 |
+
|
| 837 |
+
# Calculate eye contact percentage
|
| 838 |
+
eye_contact_percentage = 0
|
| 839 |
+
if self.total_frames > 0:
|
| 840 |
+
eye_contact_percentage = (self.eye_contact_frames / self.total_frames) * 100
|
| 841 |
+
|
| 842 |
+
# Calculate eye contact time based on percentage of total frames
|
| 843 |
+
# This ensures consistency between percentage and duration metrics
|
| 844 |
+
eye_contact_time = (eye_contact_percentage / 100) * total_duration_seconds
|
| 845 |
+
|
| 846 |
+
# Current ongoing eye contact - calculate based on frames instead of wall clock
|
| 847 |
+
current_eye_contact_duration = 0
|
| 848 |
+
if self.eye_contact_start is not None:
|
| 849 |
+
current_time = self.start_time + (self.total_frames * frame_duration)
|
| 850 |
+
current_eye_contact_duration = current_time - self.eye_contact_start
|
| 851 |
+
|
| 852 |
+
# Fix for contact episodes: Create a copy of contact episodes for calculation
|
| 853 |
+
# to avoid modifying the original list
|
| 854 |
+
contact_episodes_for_stats = self.contact_episodes.copy()
|
| 855 |
+
|
| 856 |
+
# If we're currently in eye contact, add the current episode to our stats calculation
|
| 857 |
+
current_episode = None
|
| 858 |
+
if self.eye_contact_start is not None:
|
| 859 |
+
frame_duration = 1.0 / self.fps
|
| 860 |
+
current_time = self.start_time + (self.total_frames * frame_duration)
|
| 861 |
+
current_episode = {
|
| 862 |
+
'start_time': self.eye_contact_start,
|
| 863 |
+
'end_time': current_time,
|
| 864 |
+
'duration': current_time - self.eye_contact_start,
|
| 865 |
+
'non_contact_before': self.non_contact_duration
|
| 866 |
+
}
|
| 867 |
+
contact_episodes_for_stats.append(current_episode)
|
| 868 |
+
|
| 869 |
+
# Eye contact frequency (episodes per minute)
|
| 870 |
+
contact_frequency = 0
|
| 871 |
+
if total_duration_seconds > 0:
|
| 872 |
+
contact_frequency = len(contact_episodes_for_stats) / (total_duration_seconds / 60)
|
| 873 |
+
|
| 874 |
+
# Average contact duration
|
| 875 |
+
avg_contact_duration = 0
|
| 876 |
+
if len(contact_episodes_for_stats) > 0:
|
| 877 |
+
avg_contact_duration = sum(ep['duration'] for ep in contact_episodes_for_stats) / len(contact_episodes_for_stats)
|
| 878 |
+
|
| 879 |
+
# Calculate blink metrics
|
| 880 |
+
blink_rate = 0
|
| 881 |
+
avg_blink_duration = 0
|
| 882 |
+
avg_inter_blink_interval = 0
|
| 883 |
+
blink_duration_std = 0
|
| 884 |
+
|
| 885 |
+
if total_duration_seconds > 0:
|
| 886 |
+
blink_rate = self.total_blinks / (total_duration_seconds / 60)
|
| 887 |
+
|
| 888 |
+
if self.blink_durations:
|
| 889 |
+
avg_blink_duration = np.mean(self.blink_durations)
|
| 890 |
+
blink_duration_std = np.std(self.blink_durations)
|
| 891 |
+
|
| 892 |
+
# Fix for inter-blink interval calculation
|
| 893 |
+
# First check if we have recorded intervals
|
| 894 |
+
if self.inter_blink_intervals:
|
| 895 |
+
avg_inter_blink_interval = np.mean(self.inter_blink_intervals)
|
| 896 |
+
else:
|
| 897 |
+
# If no recorded intervals but we have blinks, calculate based on total time
|
| 898 |
+
if self.total_blinks > 1:
|
| 899 |
+
# Estimate average interval as total duration divided by (blinks-1)
|
| 900 |
+
avg_inter_blink_interval = total_duration_seconds / (self.total_blinks - 1)
|
| 901 |
+
|
| 902 |
+
# Calculate gaze metrics
|
| 903 |
+
gaze_deviation_mean = 0
|
| 904 |
+
gaze_deviation_std = 0
|
| 905 |
+
gaze_deviation_history = list(self.gaze_deviation_history)
|
| 906 |
+
|
| 907 |
+
if gaze_deviation_history:
|
| 908 |
+
gaze_deviation_mean = np.mean(gaze_deviation_history)
|
| 909 |
+
gaze_deviation_std = np.std(gaze_deviation_history)
|
| 910 |
+
|
| 911 |
+
# Calculate EAR metrics
|
| 912 |
+
ear_mean = 0
|
| 913 |
+
ear_std = 0
|
| 914 |
+
ear_history = list(self.ear_history)
|
| 915 |
+
|
| 916 |
+
if ear_history:
|
| 917 |
+
ear_mean = np.mean(ear_history)
|
| 918 |
+
ear_std = np.std(ear_history)
|
| 919 |
+
|
| 920 |
+
# Enhanced fatigue metrics with personalized baselines
|
| 921 |
+
perclos_percentage = 0
|
| 922 |
+
if self.ear_history:
|
| 923 |
+
closed_frames = sum(1 for ear in self.ear_history if ear < self.ear_threshold * 0.8)
|
| 924 |
+
perclos_percentage = (closed_frames / len(self.ear_history)) * 100
|
| 925 |
+
|
| 926 |
+
# Smoothed perclos using all available data
|
| 927 |
+
smoothed_perclos = perclos_percentage
|
| 928 |
+
if len(self.perclos_history) > 0:
|
| 929 |
+
smoothed_perclos = np.mean(list(self.perclos_history))
|
| 930 |
+
|
| 931 |
+
# Calculate blink rate from the full history
|
| 932 |
+
full_blink_rate = self.blinks_per_minute
|
| 933 |
+
|
| 934 |
+
# Lid closure index (LCI) - combines blink rate and duration
|
| 935 |
+
lid_closure_index = 0
|
| 936 |
+
lid_closure_index_adjusted = 0
|
| 937 |
+
|
| 938 |
+
if avg_blink_duration > 0:
|
| 939 |
+
# Calculate the raw lid closure index (blink rate * avg duration)
|
| 940 |
+
lid_closure_index = blink_rate * avg_blink_duration
|
| 941 |
+
|
| 942 |
+
# Apply FPS-based adjustment for more accurate LCI measurement
|
| 943 |
+
# For low FPS, each blink is quantized to a full frame duration which inflates LCI
|
| 944 |
+
# Reference standard is 30 FPS
|
| 945 |
+
fps_factor = min(1.0, self.fps / 30.0) # Cap at 1.0 to avoid reducing values for high FPS
|
| 946 |
+
|
| 947 |
+
# If FPS is low, scale the LCI to compensate for quantization effects
|
| 948 |
+
lid_closure_index_adjusted = lid_closure_index * fps_factor
|
| 949 |
+
|
| 950 |
+
# Ensure values are reasonable (cap at 0.5 as theoretical maximum - 50% eye closure would be extreme)
|
| 951 |
+
lid_closure_index = min(0.5, lid_closure_index)
|
| 952 |
+
lid_closure_index_adjusted = min(0.5, lid_closure_index_adjusted)
|
| 953 |
+
|
| 954 |
+
# For extremely low FPS (under 15), use adjusted LCI as the primary metric
|
| 955 |
+
if self.fps < 15:
|
| 956 |
+
lid_closure_index = lid_closure_index_adjusted
|
| 957 |
+
|
| 958 |
+
# Attention stability score (10-point scale)
|
| 959 |
+
attention_stability_score = 10
|
| 960 |
+
if gaze_deviation_std > 0.05:
|
| 961 |
+
# Penalize for excessive gaze deviation variability
|
| 962 |
+
attention_stability_score -= min(5, gaze_deviation_std * 20)
|
| 963 |
+
elif gaze_deviation_std < 0.02 and perclos_percentage > 10:
|
| 964 |
+
# Penalize for too stable gaze combined with high PERCLOS (zoning out)
|
| 965 |
+
attention_stability_score -= min(4, (0.02 - gaze_deviation_std) * 100)
|
| 966 |
+
|
| 967 |
+
# Add capping rule for extremely low gaze deviation variability
|
| 968 |
+
# This indicates potentially mechanical, unnatural, or fixed gaze patterns
|
| 969 |
+
if gaze_deviation_std < 0.01:
|
| 970 |
+
attention_stability_score = min(attention_stability_score, 5)
|
| 971 |
+
|
| 972 |
+
# Baseline difference metrics
|
| 973 |
+
perclos_baseline_diff = 0
|
| 974 |
+
blink_duration_baseline_diff = 0
|
| 975 |
+
blink_rate_baseline_diff = 0
|
| 976 |
+
|
| 977 |
+
if self.baseline_calibration_complete:
|
| 978 |
+
if self.baseline_perclos is not None:
|
| 979 |
+
perclos_baseline_diff = smoothed_perclos - self.baseline_perclos
|
| 980 |
+
if self.baseline_blink_duration is not None and avg_blink_duration > 0:
|
| 981 |
+
blink_duration_baseline_diff = avg_blink_duration - self.baseline_blink_duration
|
| 982 |
+
if self.baseline_blink_rate is not None:
|
| 983 |
+
blink_rate_baseline_diff = full_blink_rate - self.baseline_blink_rate
|
| 984 |
+
|
| 985 |
+
# Current fatigue level from the most recent assessment
|
| 986 |
+
current_fatigue_level = self.current_fatigue_level
|
| 987 |
+
|
| 988 |
+
# Create the stats dictionary
|
| 989 |
+
stats = {
|
| 990 |
+
'total_frames': self.total_frames,
|
| 991 |
+
'total_duration_seconds': total_duration_seconds,
|
| 992 |
+
'eye_contact_frames': self.eye_contact_frames,
|
| 993 |
+
'eye_contact_percentage': eye_contact_percentage,
|
| 994 |
+
'total_eye_contact_time': eye_contact_time,
|
| 995 |
+
'eye_contact_duration_seconds': eye_contact_time, # For backward compatibility
|
| 996 |
+
'longest_eye_contact': self.longest_eye_contact,
|
| 997 |
+
'longest_eye_contact_seconds': self.longest_eye_contact, # For backward compatibility
|
| 998 |
+
'current_eye_contact_duration': current_eye_contact_duration,
|
| 999 |
+
'current_eye_contact_duration_seconds': current_eye_contact_duration, # For backward compatibility
|
| 1000 |
+
'contact_episodes': len(contact_episodes_for_stats),
|
| 1001 |
+
'contact_frequency': contact_frequency,
|
| 1002 |
+
'contact_frequency_per_minute': contact_frequency, # For backward compatibility
|
| 1003 |
+
'avg_contact_duration': avg_contact_duration,
|
| 1004 |
+
'average_contact_duration_seconds': avg_contact_duration, # For backward compatibility
|
| 1005 |
+
'total_blinks': self.total_blinks,
|
| 1006 |
+
'blink_rate': blink_rate,
|
| 1007 |
+
'blink_rate_per_minute': blink_rate, # For backward compatibility
|
| 1008 |
+
'avg_blink_duration': avg_blink_duration,
|
| 1009 |
+
'average_blink_duration_seconds': avg_blink_duration, # For backward compatibility
|
| 1010 |
+
'blink_duration_std': blink_duration_std,
|
| 1011 |
+
'blink_duration_variability': blink_duration_std, # For backward compatibility
|
| 1012 |
+
'avg_inter_blink_interval': avg_inter_blink_interval,
|
| 1013 |
+
'average_inter_blink_interval_seconds': avg_inter_blink_interval, # For backward compatibility
|
| 1014 |
+
'gaze_deviation_mean': gaze_deviation_mean,
|
| 1015 |
+
'gaze_deviation_std': gaze_deviation_std,
|
| 1016 |
+
'eye_aspect_ratio_mean': ear_mean,
|
| 1017 |
+
'eye_aspect_ratio_std': ear_std,
|
| 1018 |
+
|
| 1019 |
+
# Enhanced fatigue metrics
|
| 1020 |
+
'perclos_percentage': perclos_percentage,
|
| 1021 |
+
'smoothed_perclos': smoothed_perclos,
|
| 1022 |
+
'lid_closure_index': lid_closure_index,
|
| 1023 |
+
'lid_closure_index_raw': lid_closure_index_adjusted if self.fps < 15 else lid_closure_index, # Store original value for reference
|
| 1024 |
+
'lid_closure_index_adjusted': lid_closure_index_adjusted, # Store FPS-adjusted value
|
| 1025 |
+
'fps_for_lci': self.fps, # Store FPS for reference
|
| 1026 |
+
'current_blink_rate': full_blink_rate,
|
| 1027 |
+
'attention_stability_score': attention_stability_score,
|
| 1028 |
+
|
| 1029 |
+
# Personalized baseline comparisons
|
| 1030 |
+
'perclos_baseline_diff': perclos_baseline_diff,
|
| 1031 |
+
'blink_duration_baseline_diff': blink_duration_baseline_diff,
|
| 1032 |
+
'blink_rate_baseline_diff': blink_rate_baseline_diff,
|
| 1033 |
+
'baseline_calibration_complete': self.baseline_calibration_complete
|
| 1034 |
+
}
|
| 1035 |
+
|
| 1036 |
+
return stats
|
| 1037 |
+
|
| 1038 |
+
def get_interview_assessment(self, model_name=None, eye_contact_rubrics=None, blinking_rubrics=None, fatigue_rubrics=None, attention_rubrics=None):
|
| 1039 |
+
"""
|
| 1040 |
+
Analyze eye contact patterns in the context of an interview using LLM.
|
| 1041 |
+
|
| 1042 |
+
Args:
|
| 1043 |
+
model_name: The name of the LLM model to use (e.g., "gpt-4", "claude-3-sonnet", "llama3")
|
| 1044 |
+
If None, uses the heuristic method.
|
| 1045 |
+
|
| 1046 |
+
Returns:
|
| 1047 |
+
dict: Assessment of eye contact behavior with interview-specific metrics
|
| 1048 |
+
"""
|
| 1049 |
+
# If no model name is provided, fallback to heuristic method
|
| 1050 |
+
if model_name is None:
|
| 1051 |
+
return self.get_interview_assessment_heuristic()
|
| 1052 |
+
|
| 1053 |
+
stats = self.get_stats()
|
| 1054 |
+
|
| 1055 |
+
eye_contact_rubrics = """Consider these guidelines for eye contact in online interviews and self-introduction videos:
|
| 1056 |
+
- In self-recorded videos and online interviews, sustained eye contact (looking at the camera) is generally encouraged and considered positive.
|
| 1057 |
+
- Near 100 percent eye contact is acceptable and often preferred, as it conveys confidence, attentiveness, and professionalism.
|
| 1058 |
+
- Very brief eye contact episodes (< 1 second) may signal nervousness or lack of confidence.
|
| 1059 |
+
- Excessive side glances, looking away frequently, or avoiding the camera may suggest disengagement or discomfort.
|
| 1060 |
+
- A natural, stable gaze toward the camera lens helps establish connection with the viewer."""
|
| 1061 |
+
|
| 1062 |
+
|
| 1063 |
+
blinking_rubrics = """Consider these guidelines for professional interview blinking behavior:
|
| 1064 |
+
- Normal blink rate for adults during conversation is approximately 12-20 blinks per minute.
|
| 1065 |
+
- Very low blink rates (<10 blinks/min) may indicate intense focus, cognitive load, or suppressed emotions.
|
| 1066 |
+
- Excessive blinking (>20-25 blinks/min) often reflects nervousness, discomfort, or eye fatigue.
|
| 1067 |
+
- Normal blink duration ranges from 0.1 to 0.4 seconds.
|
| 1068 |
+
- Longer blinks (>0.4 seconds) may suggest fatigue, drowsiness, or disengagement.
|
| 1069 |
+
- Rapid, shallow blinks clustered together may indicate cognitive stress or nervousness."""
|
| 1070 |
+
|
| 1071 |
+
|
| 1072 |
+
fatigue_rubrics = """Consider these guidelines for professional interview fatigue indicators:
|
| 1073 |
+
- A PERCLOS (percentage of time eyes are closed) above 15 percent is a strong indicator of fatigue.
|
| 1074 |
+
- High variability in blink duration (standard deviation > 0.1 seconds) may suggest difficulty maintaining alertness.
|
| 1075 |
+
- A consistently high blink rate combined with longer blink durations may indicate drowsiness or cognitive fatigue.
|
| 1076 |
+
- Elevated Lid Closure Index (LCI) values can signal ocular fatigue but must be interpreted in context. Research suggests that values around 0.5-1.0 might be more indicative of actual fatigue
|
| 1077 |
+
- Fatigue can negatively impact the perception of interest, engagement, and professionalism in an interview."""
|
| 1078 |
+
|
| 1079 |
+
|
| 1080 |
+
attention_rubrics = """Consider these guidelines for professional interview attention patterns:
|
| 1081 |
+
- An attention stability score below 5 (on a 0-10 scale) suggests difficulty maintaining focus (based on internal system metrics).
|
| 1082 |
+
- Very low gaze deviation variability (< 0.03) may indicate 'zoning out' or mental disengagement.
|
| 1083 |
+
- Very high gaze deviation variability (> 0.25) may suggest distractibility or cognitive overload.
|
| 1084 |
+
- Consistent and moderate gaze variability is crucial for demonstrating attentiveness and engagement in interviews."""
|
| 1085 |
+
|
| 1086 |
+
|
| 1087 |
+
|
| 1088 |
+
# Define the system prompt for the LLM
|
| 1089 |
+
system_prompt = f"""You are an expert interview coach specializing in non-verbal communication.
|
| 1090 |
+
Analyze the eye contact, blinking statistics, fatigue indicators, and attention patterns provided to assess how effective the person's non-verbal behavior would be in a professional interview context.
|
| 1091 |
+
|
| 1092 |
+
Eye contact rubrics:
|
| 1093 |
+
{eye_contact_rubrics}
|
| 1094 |
+
|
| 1095 |
+
Blinking rubrics:
|
| 1096 |
+
{blinking_rubrics}
|
| 1097 |
+
|
| 1098 |
+
Fatigue rubrics:
|
| 1099 |
+
{fatigue_rubrics}
|
| 1100 |
+
|
| 1101 |
+
Attention rubrics:
|
| 1102 |
+
{attention_rubrics}
|
| 1103 |
+
|
| 1104 |
+
Based on these guidelines, analyze the provided statistics and provide as a valid JSON object:
|
| 1105 |
+
1. A score from 0-10
|
| 1106 |
+
2. A brief assessment of the overall non-verbal communication quality (1-2 sentences)
|
| 1107 |
+
3. Specific patterns detected in eye contact, blinking, fatigue, and attention (list of bullet points)
|
| 1108 |
+
4. Fatigue indicators observed (list of bullet points)
|
| 1109 |
+
5. Attention quality assessment (list of bullet points)
|
| 1110 |
+
6. Actionable recommendations for improvement (list of bullet points)
|
| 1111 |
+
|
| 1112 |
+
Never use backticks like this: ```json or ``` in your response.
|
| 1113 |
+
"""
|
| 1114 |
+
|
| 1115 |
+
# Format the stats for the human prompt
|
| 1116 |
+
stats_json = json.dumps(stats, indent=2)
|
| 1117 |
+
human_prompt = f"""Please analyze these eye contact, blinking, fatigue and attention statistics for an interview scenario:
|
| 1118 |
+
{stats_json}
|
| 1119 |
+
|
| 1120 |
+
You are tasked with evaluating the candidate's non-verbal behavior to assist the interviewer in making an informed decision about the candidate's suitability for the role. Your analysis should focus on eye contact, blinking patterns, fatigue indicators, and attention levels, as observed in the interview video. The assessment must be professional, clear, and objective, providing meaningful insights into the candidate's engagement, confidence, and alertness.
|
| 1121 |
+
|
| 1122 |
+
Your analysis must be structured as a valid JSON object with the following keys:
|
| 1123 |
+
- score: A numerical score from 0 to 10 reflecting the overall quality of non-verbal behavior.
|
| 1124 |
+
- assessment: A brief summary of the candidate's non-verbal communication performance.
|
| 1125 |
+
- patterns: A list of specific behavioral patterns detected (e.g., sustained eye contact, excessive blinking).
|
| 1126 |
+
- fatigue_indicators: A list of signs suggesting potential fatigue or lack of alertness.
|
| 1127 |
+
- attention_indicators: A list of signs reflecting the candidate's level of focus and attention.
|
| 1128 |
+
- recommendations: A list of clear and actionable suggestions to help the candidate improve their non-verbal communication in future interviews.
|
| 1129 |
+
|
| 1130 |
+
Ensure that the assessment is concise, easy to understand, and professionally written, providing the interviewer with valuable guidance on the candidate's overall interview performance."""
|
| 1131 |
+
|
| 1132 |
+
# Choose the appropriate LLM based on model_name
|
| 1133 |
+
if "gpt" in model_name.lower():
|
| 1134 |
+
print(f"Using GPT model: {model_name}")
|
| 1135 |
+
llm = ChatOpenAI(model_name=model_name, temperature=0.3)
|
| 1136 |
+
messages = [
|
| 1137 |
+
SystemMessage(content=system_prompt),
|
| 1138 |
+
HumanMessage(content=human_prompt)
|
| 1139 |
+
]
|
| 1140 |
+
response = llm.invoke(messages)
|
| 1141 |
+
llm_analysis = response.content
|
| 1142 |
+
elif "claude" in model_name.lower():
|
| 1143 |
+
print(f"Using Claude model: {model_name}")
|
| 1144 |
+
llm = ChatAnthropic(model_name=model_name, temperature=0.3)
|
| 1145 |
+
messages = [
|
| 1146 |
+
SystemMessage(content=system_prompt),
|
| 1147 |
+
HumanMessage(content=human_prompt)
|
| 1148 |
+
]
|
| 1149 |
+
response = llm.invoke(messages)
|
| 1150 |
+
llm_analysis = response.content
|
| 1151 |
+
elif "llama" in model_name.lower():
|
| 1152 |
+
print(f"Using Llama model: {model_name}")
|
| 1153 |
+
llm = ChatGroq(model_name=model_name, temperature=0.3)
|
| 1154 |
+
messages = [
|
| 1155 |
+
SystemMessage(content=system_prompt),
|
| 1156 |
+
HumanMessage(content=human_prompt)
|
| 1157 |
+
]
|
| 1158 |
+
response = llm.invoke(messages)
|
| 1159 |
+
llm_analysis = response.content
|
| 1160 |
+
else:
|
| 1161 |
+
# Fallback to the heuristic method if model not supported
|
| 1162 |
+
print(f"Model {model_name} not supported")
|
| 1163 |
+
raise ValueError(f"Model {model_name} not supported")
|
| 1164 |
+
#return self.get_interview_assessment_heuristic()
|
| 1165 |
+
|
| 1166 |
+
try:
|
| 1167 |
+
# Parse the JSON response
|
| 1168 |
+
assessment = json.loads(llm_analysis.replace("```json", "").replace("```", ""))
|
| 1169 |
+
# Ensure the expected keys are present
|
| 1170 |
+
required_keys = ["score", "assessment", "patterns", "fatigue_indicators", "attention_indicators", "recommendations"]
|
| 1171 |
+
for key in required_keys:
|
| 1172 |
+
if key not in assessment:
|
| 1173 |
+
assessment[key] = []
|
| 1174 |
+
return assessment
|
| 1175 |
+
except (json.JSONDecodeError, TypeError):
|
| 1176 |
+
# Fallback to heuristic assessment if LLM response can't be parsed
|
| 1177 |
+
raise ValueError(f"LLM response can't be parsed: {llm_analysis}")
|
| 1178 |
+
#return self.get_interview_assessment_heuristic()
|
| 1179 |
+
|
| 1180 |
+
def get_interview_assessment_heuristic(self):
|
| 1181 |
+
"""
|
| 1182 |
+
Analyze eye contact patterns in the context of an interview.
|
| 1183 |
+
|
| 1184 |
+
Returns:
|
| 1185 |
+
dict: Assessment of eye contact behavior with interview-specific metrics
|
| 1186 |
+
"""
|
| 1187 |
+
stats = self.get_stats()
|
| 1188 |
+
|
| 1189 |
+
# Define thresholds for interview assessment
|
| 1190 |
+
assessment = {
|
| 1191 |
+
'score': 0, # 0-10 scale
|
| 1192 |
+
'assessment': '',
|
| 1193 |
+
'patterns': [],
|
| 1194 |
+
'fatigue_indicators': [],
|
| 1195 |
+
'attention_indicators': [],
|
| 1196 |
+
'recommendations': []
|
| 1197 |
+
}
|
| 1198 |
+
|
| 1199 |
+
# Calculate base score from 0-10
|
| 1200 |
+
base_score = min(stats['eye_contact_percentage'] / 7, 10) # Optimal is around 70%
|
| 1201 |
+
|
| 1202 |
+
# Adjust for patterns
|
| 1203 |
+
if stats['longest_eye_contact'] > 10:
|
| 1204 |
+
base_score -= 1
|
| 1205 |
+
assessment['patterns'].append('Extended staring detected')
|
| 1206 |
+
assessment['recommendations'].append('Avoid extended eye contact over 5-7 seconds as it can create discomfort')
|
| 1207 |
+
|
| 1208 |
+
if stats['avg_contact_duration'] < 1:
|
| 1209 |
+
base_score -= 2
|
| 1210 |
+
assessment['patterns'].append('Very brief eye contact episodes')
|
| 1211 |
+
assessment['recommendations'].append('Try to maintain eye contact for 2-3 seconds when speaking or listening')
|
| 1212 |
+
|
| 1213 |
+
if stats['eye_contact_percentage'] < 30:
|
| 1214 |
+
base_score -= 2
|
| 1215 |
+
assessment['patterns'].append('Insufficient overall eye contact')
|
| 1216 |
+
assessment['recommendations'].append('Aim for 60-70% eye contact during professional conversations')
|
| 1217 |
+
|
| 1218 |
+
if stats['eye_contact_percentage'] > 85:
|
| 1219 |
+
base_score -= 1
|
| 1220 |
+
assessment['patterns'].append('Excessive eye contact')
|
| 1221 |
+
assessment['recommendations'].append('Too much constant eye contact can appear intimidating. Try occasional natural breaks')
|
| 1222 |
+
|
| 1223 |
+
if stats['gaze_deviation_std'] < 0.05:
|
| 1224 |
+
base_score -= 1
|
| 1225 |
+
assessment['patterns'].append('Unnatural or static gaze')
|
| 1226 |
+
assessment['recommendations'].append('Allow natural movement of gaze during conversation')
|
| 1227 |
+
|
| 1228 |
+
# Analyze blink behavior
|
| 1229 |
+
normal_blink_rate_min = 12 # Normal blink rate for adults is ~12-15 per minute during conversation
|
| 1230 |
+
normal_blink_rate_max = 20
|
| 1231 |
+
normal_blink_duration_min = 0.1 # Normal blink duration is ~0.1-0.4 seconds
|
| 1232 |
+
normal_blink_duration_max = 0.4
|
| 1233 |
+
|
| 1234 |
+
# Adjust score based on blink rate
|
| 1235 |
+
if 'blink_rate' in stats:
|
| 1236 |
+
if stats['blink_rate'] < normal_blink_rate_min:
|
| 1237 |
+
base_score -= 0.5
|
| 1238 |
+
assessment['patterns'].append(f'Low blink rate ({stats["blink_rate"]:.1f} per minute)')
|
| 1239 |
+
assessment['recommendations'].append('Your blink rate is lower than normal, which can indicate anxiety or intense focus. Try to relax and blink naturally.')
|
| 1240 |
+
elif stats['blink_rate'] > normal_blink_rate_max:
|
| 1241 |
+
base_score -= 1
|
| 1242 |
+
assessment['patterns'].append(f'Excessive blinking ({stats["blink_rate"]:.1f} per minute)')
|
| 1243 |
+
assessment['recommendations'].append('Your blink rate is higher than normal, which can indicate nervousness. Practice relaxation techniques before interviews.')
|
| 1244 |
+
|
| 1245 |
+
# Adjust score based on blink duration
|
| 1246 |
+
if 'avg_blink_duration' in stats and stats['blink_durations']:
|
| 1247 |
+
if stats['avg_blink_duration'] > normal_blink_duration_max:
|
| 1248 |
+
base_score -= 0.5
|
| 1249 |
+
assessment['patterns'].append('Slower than normal blinks')
|
| 1250 |
+
assessment['recommendations'].append('Your blinks are longer than average, which might make you appear tired or disengaged.')
|
| 1251 |
+
elif stats['avg_blink_duration'] < normal_blink_duration_min:
|
| 1252 |
+
assessment['patterns'].append('Quick, rapid blinks')
|
| 1253 |
+
|
| 1254 |
+
# Analyze fatigue indicators
|
| 1255 |
+
# PERCLOS (percentage of time eyes are closed) above 15% indicates fatigue
|
| 1256 |
+
if stats['perclos_percentage'] > 15:
|
| 1257 |
+
assessment['fatigue_indicators'].append(f'High percentage of time with eyes nearly closed ({stats["perclos_percentage"]:.1f}%)')
|
| 1258 |
+
assessment['recommendations'].append('Your eye closure patterns suggest potential fatigue. Consider getting adequate rest before important interviews.')
|
| 1259 |
+
base_score -= 1
|
| 1260 |
+
|
| 1261 |
+
# High blink duration variability suggests struggling to maintain alertness
|
| 1262 |
+
if stats['blink_duration_std'] > 0.1:
|
| 1263 |
+
assessment['fatigue_indicators'].append('Inconsistent blink patterns (alternating between quick and slow blinks)')
|
| 1264 |
+
assessment['recommendations'].append('Your variable blink patterns may indicate fluctuating alertness levels. Consider scheduling interviews during your peak energy hours.')
|
| 1265 |
+
base_score -= 0.5
|
| 1266 |
+
|
| 1267 |
+
# Lid closure index analysis - but use the appropriate LCI value based on FPS
|
| 1268 |
+
lci_threshold = 0.06 # Standard threshold for normal FPS
|
| 1269 |
+
|
| 1270 |
+
# Determine which LCI value to use for assessment
|
| 1271 |
+
lci_value = stats.get('lid_closure_index', 0)
|
| 1272 |
+
low_fps = 'fps_for_lci' in stats and stats['fps_for_lci'] < 15
|
| 1273 |
+
|
| 1274 |
+
if low_fps and 'lid_closure_index_adjusted' in stats:
|
| 1275 |
+
# Use the adjusted LCI for low FPS videos
|
| 1276 |
+
lci_value = stats['lid_closure_index_adjusted']
|
| 1277 |
+
lci_note = f" (adjusted for {stats['fps_for_lci']:.1f} FPS)"
|
| 1278 |
+
else:
|
| 1279 |
+
lci_note = ""
|
| 1280 |
+
|
| 1281 |
+
# Apply the LCI threshold check
|
| 1282 |
+
if lci_value > lci_threshold:
|
| 1283 |
+
assessment['fatigue_indicators'].append(f'Slow eye reopening after blinks (lid closure index: {lci_value:.3f}{lci_note})')
|
| 1284 |
+
assessment['recommendations'].append('Practice eye exercises before interviews to improve ocular muscle responsiveness and reduce signs of fatigue.')
|
| 1285 |
+
base_score -= 0.5
|
| 1286 |
+
|
| 1287 |
+
# Analyze attention indicators
|
| 1288 |
+
# Very low gaze deviation could indicate "zoning out" or mental exhaustion
|
| 1289 |
+
# but only if combined with other indicators like low EAR or high PERCLOS
|
| 1290 |
+
if stats['gaze_deviation_std'] < 0.03 and stats['perclos_percentage'] > 10 and stats['eye_aspect_ratio_std'] < 0.05:
|
| 1291 |
+
assessment['attention_indicators'].append('Potential signs of zoning out or low engagement')
|
| 1292 |
+
assessment['recommendations'].append('Try to maintain a more varied and natural gaze pattern during conversations, even when focused.')
|
| 1293 |
+
base_score -= 0.5
|
| 1294 |
+
|
| 1295 |
+
# Extremely high gaze deviation indicates difficulty maintaining attention
|
| 1296 |
+
if stats['gaze_deviation_std'] > 0.25:
|
| 1297 |
+
assessment['attention_indicators'].append('Highly variable or erratic gaze patterns')
|
| 1298 |
+
assessment['recommendations'].append('Your rapidly shifting gaze may indicate distractibility. Practice focused attention exercises before interviews.')
|
| 1299 |
+
base_score -= 1
|
| 1300 |
+
|
| 1301 |
+
# Attention stability score below 5 suggests attention issues
|
| 1302 |
+
if stats['attention_stability_score'] < 5:
|
| 1303 |
+
assessment['attention_indicators'].append('Low attention stability during conversation')
|
| 1304 |
+
assessment['recommendations'].append('Practice mindfulness techniques to improve your ability to maintain consistent attention during interviews.')
|
| 1305 |
+
base_score -= 0.5
|
| 1306 |
+
|
| 1307 |
+
# Clamp score to 0-10 range
|
| 1308 |
+
assessment['score'] = max(0, min(10, base_score))
|
| 1309 |
+
|
| 1310 |
+
# Generate overall assessment
|
| 1311 |
+
if assessment['score'] >= 8:
|
| 1312 |
+
assessment['assessment'] = 'Excellent eye contact patterns suitable for professional interviews.'
|
| 1313 |
+
elif assessment['score'] >= 6:
|
| 1314 |
+
assessment['assessment'] = 'Good eye contact with minor opportunities for improvement.'
|
| 1315 |
+
elif assessment['score'] >= 4:
|
| 1316 |
+
assessment['assessment'] = 'Adequate eye contact but needs improvement for professional settings.'
|
| 1317 |
+
else:
|
| 1318 |
+
assessment['assessment'] = 'Poor eye contact that may negatively impact interview perception.'
|
| 1319 |
+
|
| 1320 |
+
# If no specific patterns were detected, add a general recommendation
|
| 1321 |
+
if not assessment['patterns']:
|
| 1322 |
+
assessment['patterns'].append('No specific issues detected')
|
| 1323 |
+
|
| 1324 |
+
if not assessment['recommendations']:
|
| 1325 |
+
assessment['recommendations'].append('Continue current eye contact patterns')
|
| 1326 |
+
|
| 1327 |
+
# Add fatigue assessment if indicators were found
|
| 1328 |
+
if assessment['fatigue_indicators']:
|
| 1329 |
+
fatigue_level = len(assessment['fatigue_indicators'])
|
| 1330 |
+
if fatigue_level >= 2:
|
| 1331 |
+
assessment['assessment'] += ' Signs of significant fatigue were detected, which could impact interview performance.'
|
| 1332 |
+
else:
|
| 1333 |
+
assessment['assessment'] += ' Mild fatigue indicators were observed.'
|
| 1334 |
+
else:
|
| 1335 |
+
assessment['fatigue_indicators'].append('No significant fatigue indicators detected')
|
| 1336 |
+
|
| 1337 |
+
# Add attention assessment if indicators were found
|
| 1338 |
+
if assessment['attention_indicators']:
|
| 1339 |
+
attention_issues = len(assessment['attention_indicators'])
|
| 1340 |
+
if attention_issues >= 2:
|
| 1341 |
+
assessment['assessment'] += ' Your attention patterns suggest difficulty maintaining consistent focus.'
|
| 1342 |
+
else:
|
| 1343 |
+
assessment['assessment'] += ' Minor attention inconsistencies were observed.'
|
| 1344 |
+
else:
|
| 1345 |
+
assessment['attention_indicators'].append('Attention appears stable and appropriate')
|
| 1346 |
+
|
| 1347 |
+
return assessment
|
| 1348 |
+
|
| 1349 |
+
|
| 1350 |
+
def analyze_eye_contact(frame, analyzer=None, annotate=False):
|
| 1351 |
+
"""
|
| 1352 |
+
Analyze eye contact in a single frame.
|
| 1353 |
+
|
| 1354 |
+
Args:
|
| 1355 |
+
frame: The video frame (BGR format)
|
| 1356 |
+
analyzer: An existing EyeContactAnalyzer instance, or None to create a new one
|
| 1357 |
+
annotate: Whether to annotate the frame with visualization
|
| 1358 |
+
|
| 1359 |
+
Returns:
|
| 1360 |
+
tuple: (metrics, analyzer, annotated_frame)
|
| 1361 |
+
- metrics: Dictionary of eye contact metrics for this frame
|
| 1362 |
+
- analyzer: The EyeContactAnalyzer instance (new or updated)
|
| 1363 |
+
- annotated_frame: The frame with annotations if requested
|
| 1364 |
+
"""
|
| 1365 |
+
if analyzer is None:
|
| 1366 |
+
analyzer = EyeContactAnalyzer()
|
| 1367 |
+
|
| 1368 |
+
metrics, annotated_frame = analyzer.process_frame(frame, annotate)
|
| 1369 |
+
return metrics, analyzer, annotated_frame
|
| 1370 |
+
|
| 1371 |
+
|
| 1372 |
+
def analyze_video_blinks(video_path, output_path=None, show_preview=True):
|
| 1373 |
+
"""
|
| 1374 |
+
Analyze eye blinks in a video file.
|
| 1375 |
+
|
| 1376 |
+
Args:
|
| 1377 |
+
video_path: Path to the input video file
|
| 1378 |
+
output_path: Path to save the output video (None = don't save)
|
| 1379 |
+
show_preview: Whether to display the video during processing
|
| 1380 |
+
|
| 1381 |
+
Returns:
|
| 1382 |
+
tuple: (stats, assessment) with eye contact and blink statistics
|
| 1383 |
+
"""
|
| 1384 |
+
# Open video file
|
| 1385 |
+
cap = cv2.VideoCapture(video_path)
|
| 1386 |
+
if not cap.isOpened():
|
| 1387 |
+
print(f"Error: Could not open video file {video_path}")
|
| 1388 |
+
return None, None
|
| 1389 |
+
|
| 1390 |
+
# Get video properties
|
| 1391 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 1392 |
+
if fps <= 0:
|
| 1393 |
+
fps = 30 # Default to 30fps if unable to determine
|
| 1394 |
+
print(f"Warning: Could not determine video FPS, using default of {fps}")
|
| 1395 |
+
else:
|
| 1396 |
+
print(f"Video FPS: {fps}")
|
| 1397 |
+
|
| 1398 |
+
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 1399 |
+
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 1400 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 1401 |
+
video_duration = total_frames / fps # Calculate the actual video duration
|
| 1402 |
+
|
| 1403 |
+
print(f"Video properties: {frame_width}x{frame_height}, {fps} fps, {total_frames} frames")
|
| 1404 |
+
print(f"Video duration: {video_duration:.2f} seconds")
|
| 1405 |
+
|
| 1406 |
+
# Set up video writer if output path is provided
|
| 1407 |
+
writer = None
|
| 1408 |
+
if output_path:
|
| 1409 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 1410 |
+
writer = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
|
| 1411 |
+
|
| 1412 |
+
# Initialize analyzer with improved settings
|
| 1413 |
+
analyzer = EyeContactAnalyzer(
|
| 1414 |
+
blink_threshold=0.17, # Lower threshold to catch more blinks
|
| 1415 |
+
ear_threshold=0.21, # Higher threshold for detecting eyes fully open
|
| 1416 |
+
blink_consec_frames=1, # Detect blinks that last only 1 frame
|
| 1417 |
+
max_blink_duration=0.4, # Maximum blink duration (seconds)
|
| 1418 |
+
ear_drop_threshold=0.035, # Detect blinks based on EAR drops
|
| 1419 |
+
use_adaptive_blink_threshold=True,
|
| 1420 |
+
use_ear_drop_detection=True, # Enable EAR drop detection
|
| 1421 |
+
fps=fps # Pass the actual video fps for accurate duration calculation
|
| 1422 |
+
)
|
| 1423 |
+
|
| 1424 |
+
# Use a fixed start time of 0.0 instead of wall-clock time for accurate time calculations
|
| 1425 |
+
analyzer.start_time = 0.0
|
| 1426 |
+
analyzer.debug_mode = True
|
| 1427 |
+
|
| 1428 |
+
frame_count = 0
|
| 1429 |
+
early_stop = False
|
| 1430 |
+
|
| 1431 |
+
# Process each frame
|
| 1432 |
+
while cap.isOpened() and not early_stop:
|
| 1433 |
+
ret, frame = cap.read()
|
| 1434 |
+
if not ret:
|
| 1435 |
+
break
|
| 1436 |
+
|
| 1437 |
+
# Process the frame
|
| 1438 |
+
metrics, analyzer, annotated_frame = analyze_eye_contact(frame, analyzer, True)
|
| 1439 |
+
|
| 1440 |
+
# Add progress info
|
| 1441 |
+
frame_count += 1
|
| 1442 |
+
progress = (frame_count / total_frames) * 100
|
| 1443 |
+
cv2.putText(annotated_frame, f"Progress: {progress:.1f}% (Frame {frame_count}/{total_frames})",
|
| 1444 |
+
(20, frame_height - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
|
| 1445 |
+
|
| 1446 |
+
# Write frame to output video if needed
|
| 1447 |
+
if writer:
|
| 1448 |
+
writer.write(annotated_frame)
|
| 1449 |
+
|
| 1450 |
+
# Show preview if requested
|
| 1451 |
+
if show_preview:
|
| 1452 |
+
cv2.imshow("Video Analysis", annotated_frame)
|
| 1453 |
+
key = cv2.waitKey(1) & 0xFF
|
| 1454 |
+
if key == ord('q'):
|
| 1455 |
+
early_stop = True
|
| 1456 |
+
elif key == ord('d'):
|
| 1457 |
+
# Toggle debug mode
|
| 1458 |
+
analyzer.debug_mode = not analyzer.debug_mode
|
| 1459 |
+
print(f"Debug mode: {'ON' if analyzer.debug_mode else 'OFF'}")
|
| 1460 |
+
elif key == ord('s'):
|
| 1461 |
+
# Save current frame as image
|
| 1462 |
+
cv2.imwrite(f"blink_frame_{frame_count}.jpg", annotated_frame)
|
| 1463 |
+
print(f"Saved frame {frame_count} to disk")
|
| 1464 |
+
|
| 1465 |
+
# Print periodic updates
|
| 1466 |
+
if frame_count % 100 == 0:
|
| 1467 |
+
print(f"Processed {frame_count}/{total_frames} frames ({progress:.1f}%)")
|
| 1468 |
+
print(f"Current blink count: {analyzer.total_blinks}")
|
| 1469 |
+
|
| 1470 |
+
# Clean up
|
| 1471 |
+
cap.release()
|
| 1472 |
+
if writer:
|
| 1473 |
+
writer.release()
|
| 1474 |
+
cv2.destroyAllWindows()
|
| 1475 |
+
|
| 1476 |
+
# Get statistics
|
| 1477 |
+
stats = analyzer.get_stats()
|
| 1478 |
+
|
| 1479 |
+
# Calculate actual video duration based on frames and FPS
|
| 1480 |
+
video_duration = total_frames / fps
|
| 1481 |
+
|
| 1482 |
+
# Calculate scaling factor in case there's a discrepancy between durations
|
| 1483 |
+
# This helps correct any timing issues that might have accumulated
|
| 1484 |
+
scaling_factor = video_duration / stats['total_duration_seconds'] if stats['total_duration_seconds'] > 0 else 1.0
|
| 1485 |
+
|
| 1486 |
+
# Update ALL time-based metrics to ensure consistency with actual video duration
|
| 1487 |
+
stats['total_duration_seconds'] = video_duration
|
| 1488 |
+
|
| 1489 |
+
# Update primary eye contact metrics
|
| 1490 |
+
if stats['eye_contact_percentage'] > 0:
|
| 1491 |
+
stats['total_eye_contact_time'] = (stats['eye_contact_percentage'] / 100) * video_duration
|
| 1492 |
+
stats['eye_contact_duration_seconds'] = stats['total_eye_contact_time']
|
| 1493 |
+
|
| 1494 |
+
# Scale other time-based metrics directly
|
| 1495 |
+
stats['longest_eye_contact'] *= scaling_factor
|
| 1496 |
+
stats['longest_eye_contact_seconds'] = stats['longest_eye_contact']
|
| 1497 |
+
stats['current_eye_contact_duration'] *= scaling_factor
|
| 1498 |
+
stats['current_eye_contact_duration_seconds'] = stats['current_eye_contact_duration']
|
| 1499 |
+
stats['avg_contact_duration'] *= scaling_factor
|
| 1500 |
+
stats['average_contact_duration_seconds'] = stats['avg_contact_duration']
|
| 1501 |
+
|
| 1502 |
+
# Update blink timing metrics
|
| 1503 |
+
if stats['total_blinks'] > 0:
|
| 1504 |
+
stats['blink_rate'] = stats['total_blinks'] / (video_duration / 60)
|
| 1505 |
+
stats['blink_rate_per_minute'] = stats['blink_rate']
|
| 1506 |
+
|
| 1507 |
+
stats['avg_blink_duration'] *= scaling_factor
|
| 1508 |
+
stats['average_blink_duration_seconds'] = stats['avg_blink_duration']
|
| 1509 |
+
stats['avg_inter_blink_interval'] *= scaling_factor
|
| 1510 |
+
stats['average_inter_blink_interval_seconds'] = stats['avg_inter_blink_interval']
|
| 1511 |
+
|
| 1512 |
+
# Recalculate LCI based on scaled values with FPS adjustment
|
| 1513 |
+
if 'avg_blink_duration' in stats and stats['avg_blink_duration'] > 0:
|
| 1514 |
+
# Calculate raw LCI
|
| 1515 |
+
raw_lci = stats['blink_rate'] * stats['avg_blink_duration']
|
| 1516 |
+
|
| 1517 |
+
# Apply FPS adjustment for low frame rate videos
|
| 1518 |
+
fps_factor = min(1.0, fps / 30.0) # Cap at 1.0 to avoid reducing values for high FPS
|
| 1519 |
+
adjusted_lci = raw_lci * fps_factor
|
| 1520 |
+
|
| 1521 |
+
# Ensure values are reasonable (theoretical max around 0.5)
|
| 1522 |
+
stats['lid_closure_index_raw'] = min(0.5, raw_lci)
|
| 1523 |
+
stats['lid_closure_index_adjusted'] = min(0.5, adjusted_lci)
|
| 1524 |
+
|
| 1525 |
+
# For low FPS videos, use the adjusted value as the primary LCI
|
| 1526 |
+
if fps < 15:
|
| 1527 |
+
stats['lid_closure_index'] = stats['lid_closure_index_adjusted']
|
| 1528 |
+
else:
|
| 1529 |
+
stats['lid_closure_index'] = stats['lid_closure_index_raw']
|
| 1530 |
+
|
| 1531 |
+
# Get assessment
|
| 1532 |
+
try:
|
| 1533 |
+
assessment = analyzer.get_interview_assessment_heuristic()
|
| 1534 |
+
except Exception as e:
|
| 1535 |
+
print(f"Error generating assessment: {e}")
|
| 1536 |
+
assessment = None
|
| 1537 |
+
|
| 1538 |
+
# Print results
|
| 1539 |
+
print("\n--- Video Analysis Results ---")
|
| 1540 |
+
print(f"Total frames processed: {frame_count}")
|
| 1541 |
+
print(f"Total duration: {stats['total_duration_seconds']:.2f} seconds")
|
| 1542 |
+
print(f"Total detected blinks: {stats['total_blinks']}")
|
| 1543 |
+
print(f"Blink rate: {stats['blink_rate']:.2f} blinks/minute")
|
| 1544 |
+
if stats['avg_blink_duration'] > 0:
|
| 1545 |
+
print(f"Average blink duration: {stats['avg_blink_duration']*1000:.2f} ms")
|
| 1546 |
+
|
| 1547 |
+
# Print fatigue and attention metrics
|
| 1548 |
+
print("\n--- Fatigue and Attention Metrics ---")
|
| 1549 |
+
print(f"PERCLOS (eye closure percentage): {stats['perclos_percentage']:.2f}%")
|
| 1550 |
+
print(f"Blink duration variability: {stats['blink_duration_std']:.4f}")
|
| 1551 |
+
|
| 1552 |
+
# Print LCI with adjustment information for transparency
|
| 1553 |
+
if fps < 15:
|
| 1554 |
+
print(f"LCI (low FPS - adjusted): {stats['lid_closure_index']:.4f} (raw: {stats['lid_closure_index_raw']:.4f}, adjusted for {fps:.1f} FPS)")
|
| 1555 |
+
else:
|
| 1556 |
+
print(f"Lid closure index: {stats['lid_closure_index']:.4f}")
|
| 1557 |
+
|
| 1558 |
+
print(f"Attention stability score (0-10): {stats['attention_stability_score']:.2f}")
|
| 1559 |
+
|
| 1560 |
+
# Print assessment
|
| 1561 |
+
if assessment:
|
| 1562 |
+
print("\n--- Interview Assessment ---")
|
| 1563 |
+
print(f"Overall score: {assessment['score']:.1f}/10")
|
| 1564 |
+
print(f"Assessment: {assessment['assessment']}")
|
| 1565 |
+
|
| 1566 |
+
print("\nDetected patterns:")
|
| 1567 |
+
for pattern in assessment['patterns']:
|
| 1568 |
+
print(f"- {pattern}")
|
| 1569 |
+
|
| 1570 |
+
print("\nFatigue indicators:")
|
| 1571 |
+
for indicator in assessment['fatigue_indicators']:
|
| 1572 |
+
print(f"- {indicator}")
|
| 1573 |
+
|
| 1574 |
+
print("\nAttention indicators:")
|
| 1575 |
+
for indicator in assessment['attention_indicators']:
|
| 1576 |
+
print(f"- {indicator}")
|
| 1577 |
+
|
| 1578 |
+
print("\nRecommendations:")
|
| 1579 |
+
for rec in assessment['recommendations']:
|
| 1580 |
+
print(f"- {rec}")
|
| 1581 |
+
|
| 1582 |
+
return stats, assessment
|
| 1583 |
+
|
| 1584 |
+
|
| 1585 |
+
|
| 1586 |
+
def analyze_video_file(video_path, display_video=True, save_results=True, model_name="gpt-4o-mini"):
|
| 1587 |
+
"""
|
| 1588 |
+
Analyze eye contact in a video file and get statistics.
|
| 1589 |
+
|
| 1590 |
+
Args:
|
| 1591 |
+
video_path: Path to the video file
|
| 1592 |
+
display_video: Whether to display the video during analysis
|
| 1593 |
+
save_results: Whether to save results to a JSON file
|
| 1594 |
+
model_name: The name of the model to use for the assessment
|
| 1595 |
+
Returns:
|
| 1596 |
+
dict: Eye contact statistics and assessment
|
| 1597 |
+
"""
|
| 1598 |
+
# Open the video file
|
| 1599 |
+
cap = cv2.VideoCapture(video_path)
|
| 1600 |
+
if not cap.isOpened():
|
| 1601 |
+
print(f"Error: Could not open video file {video_path}")
|
| 1602 |
+
return None
|
| 1603 |
+
|
| 1604 |
+
# Get video properties
|
| 1605 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 1606 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 1607 |
+
duration = frame_count / fps if fps > 0 else 0
|
| 1608 |
+
|
| 1609 |
+
print(f"Analyzing video: {video_path}")
|
| 1610 |
+
print(f"Video properties: {frame_count} frames, {fps:.2f} FPS, {duration:.2f} seconds")
|
| 1611 |
+
|
| 1612 |
+
# Initialize analyzer with the correct FPS
|
| 1613 |
+
analyzer = EyeContactAnalyzer(fps=fps)
|
| 1614 |
+
# Use a fixed start time of 0.0 instead of wall-clock time for accurate timing
|
| 1615 |
+
analyzer.start_time = 0.0
|
| 1616 |
+
frame_number = 0
|
| 1617 |
+
|
| 1618 |
+
# Variables for FPS calculation
|
| 1619 |
+
prev_time = time.time()
|
| 1620 |
+
fps_counter = 0
|
| 1621 |
+
processing_fps = 0
|
| 1622 |
+
|
| 1623 |
+
# Process each frame
|
| 1624 |
+
while cap.isOpened():
|
| 1625 |
+
ret, frame = cap.read()
|
| 1626 |
+
if not ret:
|
| 1627 |
+
break
|
| 1628 |
+
|
| 1629 |
+
# Process the frame
|
| 1630 |
+
metrics, analyzer, annotated_frame = analyze_eye_contact(frame, analyzer, display_video)
|
| 1631 |
+
|
| 1632 |
+
# Calculate processing FPS
|
| 1633 |
+
fps_counter += 1
|
| 1634 |
+
current_time = time.time()
|
| 1635 |
+
if current_time - prev_time >= 1.0: # Update FPS every second
|
| 1636 |
+
processing_fps = fps_counter / (current_time - prev_time)
|
| 1637 |
+
fps_counter = 0
|
| 1638 |
+
prev_time = current_time
|
| 1639 |
+
|
| 1640 |
+
# Display progress
|
| 1641 |
+
frame_number += 1
|
| 1642 |
+
progress = (frame_number / frame_count) * 100 if frame_count > 0 else 0
|
| 1643 |
+
print(f"\rProgress: {progress:.1f}% (Frame {frame_number}/{frame_count})", end="")
|
| 1644 |
+
|
| 1645 |
+
# Calculate current video time
|
| 1646 |
+
current_video_time = frame_number / fps if fps > 0 else 0
|
| 1647 |
+
minutes = int(current_video_time // 60)
|
| 1648 |
+
seconds = int(current_video_time % 60)
|
| 1649 |
+
|
| 1650 |
+
# Display the frame if requested
|
| 1651 |
+
if display_video:
|
| 1652 |
+
# Add progress information to the frame
|
| 1653 |
+
cv2.putText(annotated_frame, f"Progress: {progress:.1f}%",
|
| 1654 |
+
(20, 140), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
|
| 1655 |
+
|
| 1656 |
+
# Add FPS information to the frame
|
| 1657 |
+
cv2.putText(annotated_frame, f"Processing FPS: {processing_fps:.1f}",
|
| 1658 |
+
(20, 170), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
|
| 1659 |
+
|
| 1660 |
+
# Add device information
|
| 1661 |
+
cv2.putText(annotated_frame, f"Device: {DEVICE}",
|
| 1662 |
+
(20, 200), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
|
| 1663 |
+
|
| 1664 |
+
# Add current video time
|
| 1665 |
+
cv2.putText(annotated_frame, f"Time: {minutes:02d}:{seconds:02d}",
|
| 1666 |
+
(20, 230), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
|
| 1667 |
+
|
| 1668 |
+
# Show FPS threshold info for LCI adjustment
|
| 1669 |
+
if fps < 15:
|
| 1670 |
+
cv2.putText(annotated_frame, f"Low FPS video - LCI will be adjusted",
|
| 1671 |
+
(20, 260), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 165, 255), 1)
|
| 1672 |
+
|
| 1673 |
+
# Show frame
|
| 1674 |
+
cv2.imshow("Eye Contact Analysis", annotated_frame)
|
| 1675 |
+
|
| 1676 |
+
# Break if 'q' is pressed
|
| 1677 |
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
| 1678 |
+
break
|
| 1679 |
+
|
| 1680 |
+
# Clean up
|
| 1681 |
+
cap.release()
|
| 1682 |
+
if display_video:
|
| 1683 |
+
cv2.destroyAllWindows()
|
| 1684 |
+
|
| 1685 |
+
print("\nAnalysis complete!")
|
| 1686 |
+
|
| 1687 |
+
# Get statistics and assessment
|
| 1688 |
+
stats = analyzer.get_stats()
|
| 1689 |
+
assessment = analyzer.get_interview_assessment(model_name=model_name)
|
| 1690 |
+
|
| 1691 |
+
# Combine results
|
| 1692 |
+
results = {
|
| 1693 |
+
"video_info": {
|
| 1694 |
+
"path": video_path,
|
| 1695 |
+
"frames": frame_count,
|
| 1696 |
+
"fps": fps,
|
| 1697 |
+
"duration_seconds": duration,
|
| 1698 |
+
"device_used": DEVICE
|
| 1699 |
+
},
|
| 1700 |
+
"eye_contact_stats": stats,
|
| 1701 |
+
"assessment": assessment
|
| 1702 |
+
}
|
| 1703 |
+
|
| 1704 |
+
from datetime import datetime
|
| 1705 |
+
# Save results if requested
|
| 1706 |
+
if save_results:
|
| 1707 |
+
output_file = f"output_results/{video_path.split('/')[-1].split('.')[0]}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_eye_contact_analysis.json"
|
| 1708 |
+
with open(output_file, 'w') as f:
|
| 1709 |
+
json.dump(results, f, indent=4)
|
| 1710 |
+
print(f"Results saved to {output_file}")
|
| 1711 |
+
|
| 1712 |
+
# Print key statistics
|
| 1713 |
+
print("\n--- Eye Contact Statistics ---")
|
| 1714 |
+
print(f"Total frames analyzed: {stats['total_frames']}")
|
| 1715 |
+
print(f"Eye contact percentage: {stats['eye_contact_percentage']:.2f}%")
|
| 1716 |
+
print(f"Total duration: {stats['total_duration_seconds']:.2f} seconds")
|
| 1717 |
+
print(f"Eye contact duration: {stats['eye_contact_duration_seconds']:.2f} seconds")
|
| 1718 |
+
print(f"Longest eye contact: {stats['longest_eye_contact_seconds']:.2f} seconds")
|
| 1719 |
+
print(f"Average contact duration: {stats['average_contact_duration_seconds']:.2f} seconds")
|
| 1720 |
+
print(f"Contact episodes: {stats['contact_episodes']}")
|
| 1721 |
+
|
| 1722 |
+
# Print LCI with adjustment information for transparency if using low FPS video
|
| 1723 |
+
if fps < 15:
|
| 1724 |
+
print(f"LCI (low FPS - adjusted): {stats['lid_closure_index']:.4f} (raw: {stats['lid_closure_index_raw']:.4f}, adjusted for {fps:.1f} FPS)")
|
| 1725 |
+
else:
|
| 1726 |
+
print(f"Lid closure index: {stats['lid_closure_index']:.4f}")
|
| 1727 |
+
|
| 1728 |
+
print("\n--- Assessment ---")
|
| 1729 |
+
print(f"Score: {assessment['score']}/10")
|
| 1730 |
+
print(f"Assessment: {assessment['assessment']}")
|
| 1731 |
+
print("\nPatterns detected:")
|
| 1732 |
+
for pattern in assessment['patterns']:
|
| 1733 |
+
print(f"- {pattern}")
|
| 1734 |
+
print("\nRecommendations:")
|
| 1735 |
+
for recommendation in assessment['recommendations']:
|
| 1736 |
+
print(f"- {recommendation}")
|
| 1737 |
+
|
| 1738 |
+
return results
|
| 1739 |
+
|
behavior_backend/app/services/processing/processing_service.py
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import BackgroundTasks
|
| 2 |
+
from sqlalchemy.orm import Session
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
import asyncio
|
| 6 |
+
|
| 7 |
+
from app.db.repositories.video import VideoRepository
|
| 8 |
+
from app.db.repositories.results import ResultsRepository
|
| 9 |
+
from app.models.processing import ProcessingRequest, ProcessingStatus
|
| 10 |
+
from app.services.processing.video_processor import process_video
|
| 11 |
+
from app.core.exceptions import VideoNotFoundError, ResultNotFoundError, VideoProcessingError
|
| 12 |
+
from app.utils.logging_utils import setup_logger
|
| 13 |
+
from app.db.base import SessionLocal
|
| 14 |
+
|
| 15 |
+
# Configure logging
|
| 16 |
+
logger = setup_logger(__name__)
|
| 17 |
+
|
| 18 |
+
class ProcessingService:
|
| 19 |
+
"""Service for video processing operations."""
|
| 20 |
+
|
| 21 |
+
def __init__(self, db: Session):
|
| 22 |
+
self.db = db
|
| 23 |
+
self.video_repo = VideoRepository(db)
|
| 24 |
+
self.results_repo = ResultsRepository(db)
|
| 25 |
+
|
| 26 |
+
async def process_video(self, request: ProcessingRequest, background_tasks: BackgroundTasks) -> ProcessingStatus:
|
| 27 |
+
"""
|
| 28 |
+
Process a video.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
request: Processing request parameters
|
| 32 |
+
background_tasks: FastAPI background tasks
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
ProcessingStatus object
|
| 36 |
+
|
| 37 |
+
Raises:
|
| 38 |
+
VideoNotFoundError: If the video is not found
|
| 39 |
+
VideoProcessingError: If there is an error processing the video
|
| 40 |
+
"""
|
| 41 |
+
video_id = request.video_id
|
| 42 |
+
|
| 43 |
+
# Check if video exists
|
| 44 |
+
db_video = self.video_repo.get_by_id(video_id)
|
| 45 |
+
if not db_video:
|
| 46 |
+
raise VideoNotFoundError(video_id)
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
# Update status
|
| 50 |
+
self.video_repo.update_status(video_id, "processing")
|
| 51 |
+
|
| 52 |
+
# Get model name from request or use default
|
| 53 |
+
model_name = getattr(request, 'model_name', "gpt-4o")
|
| 54 |
+
|
| 55 |
+
# Start processing in background
|
| 56 |
+
background_tasks.add_task(
|
| 57 |
+
self._process_video_task,
|
| 58 |
+
video_id=video_id,
|
| 59 |
+
video_path=db_video.file_path,
|
| 60 |
+
frame_rate=request.frame_rate,
|
| 61 |
+
backend=request.backend,
|
| 62 |
+
language=request.language,
|
| 63 |
+
generate_annotated_video=request.generate_annotated_video,
|
| 64 |
+
model_name=model_name
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
return ProcessingStatus(
|
| 68 |
+
video_id=video_id,
|
| 69 |
+
status="processing"
|
| 70 |
+
)
|
| 71 |
+
except Exception as e:
|
| 72 |
+
logger.error(f"Error processing video {video_id}: {str(e)}")
|
| 73 |
+
self.video_repo.update_status(video_id, "failed")
|
| 74 |
+
raise VideoProcessingError(f"Error processing video: {str(e)}")
|
| 75 |
+
|
| 76 |
+
def get_processing_status(self, video_id: str) -> ProcessingStatus:
|
| 77 |
+
"""
|
| 78 |
+
Get the processing status of a video.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
video_id: ID of the video
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
ProcessingStatus object
|
| 85 |
+
|
| 86 |
+
Raises:
|
| 87 |
+
VideoNotFoundError: If the video is not found
|
| 88 |
+
"""
|
| 89 |
+
db_video = self.video_repo.get_by_id(video_id)
|
| 90 |
+
if not db_video:
|
| 91 |
+
raise VideoNotFoundError(video_id)
|
| 92 |
+
|
| 93 |
+
return ProcessingStatus(
|
| 94 |
+
video_id=video_id,
|
| 95 |
+
status=db_video.status
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
def get_processing_results(self, video_id: str) -> dict:
|
| 99 |
+
"""
|
| 100 |
+
Get the processing results of a video.
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
video_id: ID of the video
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
Dictionary with processing results
|
| 107 |
+
|
| 108 |
+
Raises:
|
| 109 |
+
VideoNotFoundError: If the video is not found
|
| 110 |
+
ResultNotFoundError: If the processing result is not found
|
| 111 |
+
"""
|
| 112 |
+
# Get the video
|
| 113 |
+
video = self.video_repo.get_by_id(video_id)
|
| 114 |
+
if not video:
|
| 115 |
+
raise VideoNotFoundError(video_id)
|
| 116 |
+
|
| 117 |
+
# Get the processing result
|
| 118 |
+
result = self.results_repo.get_by_video_id(video_id)
|
| 119 |
+
if not result:
|
| 120 |
+
return {
|
| 121 |
+
"status": video.status,
|
| 122 |
+
"message": "No processing results available yet"
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
# Prepare response
|
| 126 |
+
response = {
|
| 127 |
+
"status": video.status,
|
| 128 |
+
"processing_date": result.processing_date.isoformat(),
|
| 129 |
+
"transcript": result.transcript,
|
| 130 |
+
"emotion_analysis": result.emotion_analysis,
|
| 131 |
+
"overall_summary": result.overall_summary,
|
| 132 |
+
"transcript_analysis": result.transcript_analysis,
|
| 133 |
+
"recommendations": result.recommendations,
|
| 134 |
+
"body_language_analysis": result.body_language_analysis,
|
| 135 |
+
"body_language_data": result.body_language_data,
|
| 136 |
+
"eye_contact_analysis": result.eye_contact_analysis,
|
| 137 |
+
"eye_contact_data": result.eye_contact_data,
|
| 138 |
+
"face_analysis_data": result.face_analysis_data
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
return response
|
| 142 |
+
|
| 143 |
+
async def _process_video_task(
|
| 144 |
+
self,
|
| 145 |
+
video_id: str,
|
| 146 |
+
video_path: str,
|
| 147 |
+
frame_rate: int,
|
| 148 |
+
backend: str,
|
| 149 |
+
language: str,
|
| 150 |
+
generate_annotated_video: bool,
|
| 151 |
+
model_name: str = "gpt-4o"
|
| 152 |
+
):
|
| 153 |
+
"""
|
| 154 |
+
Background task to process a video.
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
video_id: ID of the video
|
| 158 |
+
video_path: Path to the video file
|
| 159 |
+
frame_rate: Frame rate for processing
|
| 160 |
+
backend: Backend for face detection
|
| 161 |
+
language: Language of the video
|
| 162 |
+
generate_annotated_video: Whether to generate an annotated video
|
| 163 |
+
model_name: The name of the model to use for AI analysis (default: gpt-4o)
|
| 164 |
+
"""
|
| 165 |
+
try:
|
| 166 |
+
# Force mediapipe backend for best GPU performance on Mac M3
|
| 167 |
+
if backend == 'opencv' or not backend or backend == "retinaface":
|
| 168 |
+
logger.info(f"Backend '{backend}' doesn't support GPU acceleration or is not recommended.")
|
| 169 |
+
logger.info(f"Switching to 'mediapipe' for GPU-accelerated frame analysis.")
|
| 170 |
+
backend = "mediapipe"
|
| 171 |
+
|
| 172 |
+
# Ensure we're using a GPU-compatible backend
|
| 173 |
+
if backend not in ['mediapipe', 'ssd', 'mtcnn']:
|
| 174 |
+
logger.info(f"Backend '{backend}' may not support GPU acceleration.")
|
| 175 |
+
logger.info(f"Consider using 'mediapipe' for best GPU performance on Mac M3.")
|
| 176 |
+
|
| 177 |
+
logger.info(f"Starting video processing task for {video_id}")
|
| 178 |
+
logger.info(f"Video path: {video_path}")
|
| 179 |
+
logger.info(f"Frame rate: {frame_rate}")
|
| 180 |
+
logger.info(f"Backend: {backend}")
|
| 181 |
+
logger.info(f"Language: {language}")
|
| 182 |
+
logger.info(f"Generate annotated video: {generate_annotated_video}")
|
| 183 |
+
logger.info(f"Model name for analysis: {model_name}")
|
| 184 |
+
|
| 185 |
+
logger.info(f"Offloading video processing for {video_id} to a separate thread.")
|
| 186 |
+
# Process the video in a separate thread to avoid blocking the event loop
|
| 187 |
+
transcript, analysis = await asyncio.to_thread(
|
| 188 |
+
process_video, # The synchronous, CPU-bound function
|
| 189 |
+
video_path=video_path,
|
| 190 |
+
frame_rate=frame_rate,
|
| 191 |
+
backend=backend,
|
| 192 |
+
language=language,
|
| 193 |
+
generate_annotated_video=generate_annotated_video,
|
| 194 |
+
video_id=video_id,
|
| 195 |
+
status_callback=lambda progress: self._update_progress(video_id, progress),
|
| 196 |
+
model_name=model_name
|
| 197 |
+
)
|
| 198 |
+
logger.info(f"Threaded video processing for {video_id} completed.")
|
| 199 |
+
|
| 200 |
+
# Parse the comprehensive analysis
|
| 201 |
+
try:
|
| 202 |
+
analysis_data = json.loads(analysis) if analysis else {}
|
| 203 |
+
logger.info(f"Received analysis data: {analysis_data}")
|
| 204 |
+
|
| 205 |
+
# Log the keys for debugging
|
| 206 |
+
logger.info(f"Keys in analysis_data: {list(analysis_data.keys())}")
|
| 207 |
+
|
| 208 |
+
# Extract data from the comprehensive analysis
|
| 209 |
+
emotion_analysis = analysis_data.get("Emotion Analysis", {})
|
| 210 |
+
overall_summary = analysis_data.get("Overall Summary", "")
|
| 211 |
+
transcript_analysis = analysis_data.get("Transcript Analysis", {})
|
| 212 |
+
recommendations = analysis_data.get("Recommendations", {})
|
| 213 |
+
body_language_analysis = analysis_data.get("Body Language Analysis", {})
|
| 214 |
+
eye_contact_analysis = analysis_data.get("Eye Contact Analysis", {})
|
| 215 |
+
|
| 216 |
+
# Try both capitalized and non-capitalized versions since the format may vary
|
| 217 |
+
eye_contact_data = analysis_data.get("eye_contact_analysis", {})
|
| 218 |
+
body_language_data = analysis_data.get("body_language_analysis", {})
|
| 219 |
+
face_analysis_data = analysis_data.get("face_analysis", {})
|
| 220 |
+
|
| 221 |
+
# Check if data exists under any key - key names might be inconsistent
|
| 222 |
+
if not eye_contact_data and "eye_contact_analysis" in str(analysis_data).lower():
|
| 223 |
+
logger.info(f"Searching for eye_contact_data in analysis_data string representation")
|
| 224 |
+
for key in analysis_data.keys():
|
| 225 |
+
if "eye" in key.lower() and "contact" in key.lower():
|
| 226 |
+
logger.info(f"Found potential eye contact key: {key}")
|
| 227 |
+
eye_contact_data = analysis_data.get(key, {})
|
| 228 |
+
break
|
| 229 |
+
|
| 230 |
+
if not body_language_data and "body_language_analysis" in str(analysis_data).lower():
|
| 231 |
+
logger.info(f"Searching for body_language_data in analysis_data string representation")
|
| 232 |
+
for key in analysis_data.keys():
|
| 233 |
+
if "body" in key.lower() and "language" in key.lower():
|
| 234 |
+
logger.info(f"Found potential body language key: {key}")
|
| 235 |
+
body_language_data = analysis_data.get(key, {})
|
| 236 |
+
break
|
| 237 |
+
|
| 238 |
+
if not face_analysis_data and "face_analysis" in str(analysis_data).lower():
|
| 239 |
+
logger.info(f"Searching for face_analysis_data in analysis_data string representation")
|
| 240 |
+
for key in analysis_data.keys():
|
| 241 |
+
if "face" in key.lower() and "analysis" in key.lower():
|
| 242 |
+
logger.info(f"Found potential face analysis key: {key}")
|
| 243 |
+
face_analysis_data = analysis_data.get(key, {})
|
| 244 |
+
break
|
| 245 |
+
|
| 246 |
+
logger.info(f"Parsed analysis data: {emotion_analysis, overall_summary, transcript_analysis, recommendations, body_language_analysis}")
|
| 247 |
+
logger.info(f"Parsed eye contact data: {eye_contact_data}")
|
| 248 |
+
logger.info(f"Parsed body language data: {body_language_data}")
|
| 249 |
+
logger.info(f"Parsed face analysis data: {face_analysis_data}")
|
| 250 |
+
except Exception as e:
|
| 251 |
+
logger.error(f"Error parsing analysis JSON: {e}")
|
| 252 |
+
emotion_analysis = {}
|
| 253 |
+
overall_summary = ""
|
| 254 |
+
transcript_analysis = {}
|
| 255 |
+
recommendations = {}
|
| 256 |
+
body_language_data = {}
|
| 257 |
+
body_language_analysis = {}
|
| 258 |
+
eye_contact_data = {}
|
| 259 |
+
eye_contact_analysis = {}
|
| 260 |
+
face_analysis_data = {}
|
| 261 |
+
# Save results to database
|
| 262 |
+
self.results_repo.create(
|
| 263 |
+
video_id=video_id,
|
| 264 |
+
transcript=transcript or "",
|
| 265 |
+
emotion_analysis=emotion_analysis,
|
| 266 |
+
overall_summary=overall_summary,
|
| 267 |
+
transcript_analysis=transcript_analysis,
|
| 268 |
+
recommendations=recommendations,
|
| 269 |
+
body_language_analysis=body_language_analysis,
|
| 270 |
+
body_language_data=body_language_data,
|
| 271 |
+
eye_contact_analysis=eye_contact_analysis,
|
| 272 |
+
eye_contact_data=eye_contact_data,
|
| 273 |
+
face_analysis_data=face_analysis_data
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
# Update video status
|
| 277 |
+
self.video_repo.update_status(video_id, "completed")
|
| 278 |
+
logger.info(f"Video {video_id} processing completed successfully")
|
| 279 |
+
|
| 280 |
+
except Exception as e:
|
| 281 |
+
# Update status on error
|
| 282 |
+
logger.error(f"Error processing video {video_id}: {str(e)}")
|
| 283 |
+
self.video_repo.update_status(video_id, "failed")
|
| 284 |
+
|
| 285 |
+
def _update_progress(self, video_id: str, progress: float):
|
| 286 |
+
"""
|
| 287 |
+
Update the processing progress of a video.
|
| 288 |
+
|
| 289 |
+
Args:
|
| 290 |
+
video_id: ID of the video
|
| 291 |
+
progress: Processing progress (0-100)
|
| 292 |
+
"""
|
| 293 |
+
self.video_repo.update_progress(video_id, progress)
|
behavior_backend/app/services/processing/speech_service.py
ADDED
|
@@ -0,0 +1,530 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import whisper
|
| 3 |
+
import speech_recognition as sr
|
| 4 |
+
import logging
|
| 5 |
+
import backoff
|
| 6 |
+
import subprocess
|
| 7 |
+
import time
|
| 8 |
+
import torch
|
| 9 |
+
import json
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from pydub import AudioSegment
|
| 12 |
+
from moviepy.editor import VideoFileClip
|
| 13 |
+
from typing import Optional, List, Dict, Any
|
| 14 |
+
|
| 15 |
+
# Fix import paths
|
| 16 |
+
try:
|
| 17 |
+
from app.utils.device_utils import device, run_on_device
|
| 18 |
+
from app.utils.logging_utils import time_it, setup_logger
|
| 19 |
+
except ImportError:
|
| 20 |
+
# Try relative imports for running from project root
|
| 21 |
+
from behavior_backend.app.utils.device_utils import device, run_on_device
|
| 22 |
+
from behavior_backend.app.utils.logging_utils import time_it, setup_logger
|
| 23 |
+
|
| 24 |
+
# Configure logging
|
| 25 |
+
logger = setup_logger(__name__)
|
| 26 |
+
|
| 27 |
+
class TranscriptionService:
|
| 28 |
+
"""Service for cloud-based speech-to-text operations."""
|
| 29 |
+
|
| 30 |
+
def __init__(self):
|
| 31 |
+
"""Initialize the transcription service."""
|
| 32 |
+
self.recognizer = sr.Recognizer()
|
| 33 |
+
|
| 34 |
+
# Load credentials
|
| 35 |
+
self.credentials = self._load_credentials()
|
| 36 |
+
|
| 37 |
+
# Define available recognizers
|
| 38 |
+
self.available_recognizers = {}
|
| 39 |
+
|
| 40 |
+
# Check which recognizers are available and add them
|
| 41 |
+
if hasattr(self.recognizer, 'recognize_openai_whisper') or hasattr(self.recognizer, 'recognize_whisper_api'):
|
| 42 |
+
self.available_recognizers['openai_whisper'] = self._transcribe_openai_whisper
|
| 43 |
+
|
| 44 |
+
if hasattr(self.recognizer, 'recognize_google_cloud'):
|
| 45 |
+
self.available_recognizers['google_cloud'] = self._transcribe_google_cloud
|
| 46 |
+
|
| 47 |
+
if hasattr(self.recognizer, 'recognize_groq'):
|
| 48 |
+
self.available_recognizers['groq'] = self._transcribe_groq
|
| 49 |
+
|
| 50 |
+
logger.info(f"Available cloud transcription services: {', '.join(self.available_recognizers.keys())}")
|
| 51 |
+
|
| 52 |
+
def _load_credentials(self):
|
| 53 |
+
"""Load all service credentials"""
|
| 54 |
+
creds = {}
|
| 55 |
+
try:
|
| 56 |
+
# Google Cloud - check for credentials file in the project directory
|
| 57 |
+
google_creds_path = os.path.join(os.path.dirname(__file__), "google_credentials.json")
|
| 58 |
+
if os.path.exists(google_creds_path):
|
| 59 |
+
creds['google_cloud'] = google_creds_path
|
| 60 |
+
else:
|
| 61 |
+
# Try environment variable
|
| 62 |
+
creds['google_cloud'] = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
|
| 63 |
+
|
| 64 |
+
# Groq API key
|
| 65 |
+
creds['groq'] = os.getenv('GROQ_API_KEY')
|
| 66 |
+
print('---------------------------------------------- ')
|
| 67 |
+
print( "Groq API key:",creds['groq'])
|
| 68 |
+
print('----------------------------------------------')
|
| 69 |
+
# OpenAI API key
|
| 70 |
+
creds['openai'] = os.getenv('OPENAI_API_KEY')
|
| 71 |
+
print('----------------------------------------------')
|
| 72 |
+
print( "OpenAI API key:",creds['openai'])
|
| 73 |
+
print('----------------------------------------------')
|
| 74 |
+
except Exception as e:
|
| 75 |
+
logger.error(f"Error loading credentials: {e}")
|
| 76 |
+
|
| 77 |
+
return creds
|
| 78 |
+
|
| 79 |
+
def convert_to_wav(self, input_path):
|
| 80 |
+
"""Convert audio/video file to WAV format if needed"""
|
| 81 |
+
input_path = Path(input_path)
|
| 82 |
+
|
| 83 |
+
if input_path.suffix.lower() == '.wav':
|
| 84 |
+
return str(input_path)
|
| 85 |
+
|
| 86 |
+
output_path = input_path.with_suffix('.wav')
|
| 87 |
+
logger.info(f"Converting {input_path} to WAV format")
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
audio = AudioSegment.from_file(str(input_path))
|
| 91 |
+
audio.export(str(output_path), format="wav")
|
| 92 |
+
logger.info(f"Conversion completed: {output_path}")
|
| 93 |
+
return str(output_path)
|
| 94 |
+
except Exception as e:
|
| 95 |
+
logger.error(f"Error converting file: {e}")
|
| 96 |
+
raise
|
| 97 |
+
|
| 98 |
+
@backoff.on_exception(
|
| 99 |
+
backoff.expo,
|
| 100 |
+
Exception,
|
| 101 |
+
max_tries=3
|
| 102 |
+
)
|
| 103 |
+
def transcribe(self, audio_file_path, services=None, cleanup=True, language='en'):
|
| 104 |
+
"""
|
| 105 |
+
Transcribe audio using multiple services
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
audio_file_path: Path to the audio file
|
| 109 |
+
services: List of services to use for transcription
|
| 110 |
+
cleanup: Whether to clean up temporary files
|
| 111 |
+
language: Language code
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
Dictionary of transcription results by service
|
| 115 |
+
"""
|
| 116 |
+
if services is None:
|
| 117 |
+
services = list(self.available_recognizers.keys())
|
| 118 |
+
|
| 119 |
+
results = {}
|
| 120 |
+
original_path = Path(audio_file_path)
|
| 121 |
+
|
| 122 |
+
try:
|
| 123 |
+
wav_path = self.convert_to_wav(audio_file_path)
|
| 124 |
+
|
| 125 |
+
with sr.AudioFile(wav_path) as source:
|
| 126 |
+
audio = self.recognizer.record(source)
|
| 127 |
+
|
| 128 |
+
# Try each requested service
|
| 129 |
+
for service in services:
|
| 130 |
+
if service in self.available_recognizers:
|
| 131 |
+
try:
|
| 132 |
+
logger.info(f"Starting transcription with {service}")
|
| 133 |
+
text = self.available_recognizers[service](audio, language)
|
| 134 |
+
if text:
|
| 135 |
+
results[service] = text
|
| 136 |
+
logger.info(f"{service} transcription completed")
|
| 137 |
+
except Exception as e:
|
| 138 |
+
logger.error(f"{service} transcription failed: {e}")
|
| 139 |
+
results[service] = f"Error: {str(e)}"
|
| 140 |
+
|
| 141 |
+
if cleanup and original_path.suffix.lower() != '.wav' and wav_path != str(original_path):
|
| 142 |
+
os.remove(wav_path)
|
| 143 |
+
logger.info("Cleaned up converted file")
|
| 144 |
+
|
| 145 |
+
return results
|
| 146 |
+
|
| 147 |
+
except Exception as e:
|
| 148 |
+
logger.error(f"Transcription process failed: {e}")
|
| 149 |
+
raise
|
| 150 |
+
|
| 151 |
+
# Individual transcription methods
|
| 152 |
+
def _transcribe_openai_whisper(self, audio, language):
|
| 153 |
+
"""Transcribe using OpenAI Whisper API"""
|
| 154 |
+
if not self.credentials.get('openai'):
|
| 155 |
+
raise ValueError("OpenAI API key not found")
|
| 156 |
+
|
| 157 |
+
# Convert language code if needed (e.g., 'en-US' to 'en')
|
| 158 |
+
whisper_lang = language.split('-')[0] if '-' in language else language
|
| 159 |
+
|
| 160 |
+
# Try both method names that might be available
|
| 161 |
+
if hasattr(self.recognizer, 'recognize_whisper_api'):
|
| 162 |
+
return self.recognizer.recognize_whisper_api(
|
| 163 |
+
audio,
|
| 164 |
+
api_key=self.credentials['openai'],
|
| 165 |
+
language=whisper_lang
|
| 166 |
+
)
|
| 167 |
+
elif hasattr(self.recognizer, 'recognize_openai_whisper'):
|
| 168 |
+
return self.recognizer.recognize_openai_whisper(
|
| 169 |
+
audio,
|
| 170 |
+
api_key=self.credentials['openai'],
|
| 171 |
+
language=whisper_lang
|
| 172 |
+
)
|
| 173 |
+
else:
|
| 174 |
+
raise NotImplementedError("No OpenAI Whisper API recognition method available")
|
| 175 |
+
|
| 176 |
+
def _transcribe_google_cloud(self, audio, language):
|
| 177 |
+
"""Transcribe using Google Cloud Speech-to-Text"""
|
| 178 |
+
if not self.credentials.get('google_cloud'):
|
| 179 |
+
raise ValueError("Google Cloud credentials not found")
|
| 180 |
+
|
| 181 |
+
return self.recognizer.recognize_google_cloud(
|
| 182 |
+
audio,
|
| 183 |
+
credentials_json=self.credentials['google_cloud'],
|
| 184 |
+
language=language
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
def _transcribe_groq(self, audio, language):
|
| 188 |
+
"""Transcribe using Groq API"""
|
| 189 |
+
if not self.credentials.get('groq'):
|
| 190 |
+
raise ValueError("Groq API key not found")
|
| 191 |
+
return self.recognizer.recognize_groq(audio)
|
| 192 |
+
|
| 193 |
+
class SpeechService:
|
| 194 |
+
"""Service for speech-to-text operations."""
|
| 195 |
+
|
| 196 |
+
def __init__(self):
|
| 197 |
+
"""Initialize the speech service."""
|
| 198 |
+
self.whisper_model = None
|
| 199 |
+
self.ffmpeg_success = False
|
| 200 |
+
self.cloud_transcription_service = TranscriptionService()
|
| 201 |
+
|
| 202 |
+
@time_it
|
| 203 |
+
def extract_audio(self, video_path: str) -> str:
|
| 204 |
+
"""
|
| 205 |
+
Extract audio from a video file using FFmpeg (primary) or MoviePy (fallback).
|
| 206 |
+
|
| 207 |
+
Args:
|
| 208 |
+
video_path: Path to the video file
|
| 209 |
+
|
| 210 |
+
Returns:
|
| 211 |
+
Path to the extracted audio file
|
| 212 |
+
"""
|
| 213 |
+
logger.info(f"Extracting audio from {video_path}")
|
| 214 |
+
|
| 215 |
+
# Create output path
|
| 216 |
+
video_filename = Path(video_path).stem
|
| 217 |
+
audio_path = f"temp_{video_filename}.wav"
|
| 218 |
+
|
| 219 |
+
# Try FFmpeg approach first
|
| 220 |
+
self.ffmpeg_success = False
|
| 221 |
+
ffmpeg_start_time = time.time()
|
| 222 |
+
|
| 223 |
+
try:
|
| 224 |
+
logger.info("Attempting audio extraction with FFmpeg...")
|
| 225 |
+
result = subprocess.run([
|
| 226 |
+
'ffmpeg',
|
| 227 |
+
'-i', str(video_path),
|
| 228 |
+
'-acodec', 'pcm_s16le',
|
| 229 |
+
'-ar', '16000', # 16kHz sample rate
|
| 230 |
+
'-ac', '1', # Mono channel
|
| 231 |
+
'-y', # Overwrite output file if it exists
|
| 232 |
+
str(audio_path)
|
| 233 |
+
], check=True, capture_output=True, text=True)
|
| 234 |
+
|
| 235 |
+
self.ffmpeg_success = True
|
| 236 |
+
ffmpeg_end_time = time.time()
|
| 237 |
+
ffmpeg_duration = ffmpeg_end_time - ffmpeg_start_time
|
| 238 |
+
logger.info(f"FFmpeg audio extraction successful in {ffmpeg_duration:.4f} seconds")
|
| 239 |
+
|
| 240 |
+
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
| 241 |
+
ffmpeg_end_time = time.time()
|
| 242 |
+
ffmpeg_duration = ffmpeg_end_time - ffmpeg_start_time
|
| 243 |
+
logger.warning(f"FFmpeg audio extraction failed after {ffmpeg_duration:.4f} seconds: {str(e)}")
|
| 244 |
+
logger.warning("Falling back to MoviePy for audio extraction...")
|
| 245 |
+
|
| 246 |
+
# Fallback to MoviePy approach
|
| 247 |
+
moviepy_start_time = time.time()
|
| 248 |
+
try:
|
| 249 |
+
# Extract audio using moviepy
|
| 250 |
+
video = VideoFileClip(video_path)
|
| 251 |
+
video.audio.write_audiofile(audio_path, codec='pcm_s16le', logger=None)
|
| 252 |
+
video.close() # Explicitly close to free resources
|
| 253 |
+
|
| 254 |
+
moviepy_end_time = time.time()
|
| 255 |
+
moviepy_duration = moviepy_end_time - moviepy_start_time
|
| 256 |
+
logger.info(f"MoviePy audio extraction successful in {moviepy_duration:.4f} seconds")
|
| 257 |
+
|
| 258 |
+
except Exception as e:
|
| 259 |
+
moviepy_end_time = time.time()
|
| 260 |
+
moviepy_duration = moviepy_end_time - moviepy_start_time
|
| 261 |
+
logger.error(f"MoviePy audio extraction also failed after {moviepy_duration:.4f} seconds: {str(e)}")
|
| 262 |
+
raise RuntimeError(f"Failed to extract audio from video using both FFmpeg and MoviePy: {str(e)}")
|
| 263 |
+
|
| 264 |
+
# Verify the audio file exists and has content
|
| 265 |
+
audio_file = Path(audio_path)
|
| 266 |
+
if not audio_file.exists() or audio_file.stat().st_size == 0:
|
| 267 |
+
logger.error(f"Audio extraction produced empty or missing file: {audio_path}")
|
| 268 |
+
raise RuntimeError(f"Audio extraction failed: output file {audio_path} is empty or missing")
|
| 269 |
+
|
| 270 |
+
logger.info(f"Audio extracted to {audio_path}")
|
| 271 |
+
|
| 272 |
+
# Log performance comparison if both methods were used
|
| 273 |
+
if not self.ffmpeg_success:
|
| 274 |
+
logger.info(f"Audio extraction performance comparison - FFmpeg: {ffmpeg_duration:.4f}s, MoviePy: {moviepy_duration:.4f}s")
|
| 275 |
+
|
| 276 |
+
return audio_path
|
| 277 |
+
|
| 278 |
+
@time_it
|
| 279 |
+
def split_audio(self, audio_path: str, chunk_length_ms: int = 30000) -> List[str]:
|
| 280 |
+
"""
|
| 281 |
+
Split audio file into chunks for processing.
|
| 282 |
+
|
| 283 |
+
Args:
|
| 284 |
+
audio_path: Path to the audio file
|
| 285 |
+
chunk_length_ms: Length of each chunk in milliseconds
|
| 286 |
+
|
| 287 |
+
Returns:
|
| 288 |
+
List of paths to audio chunks
|
| 289 |
+
"""
|
| 290 |
+
logger.info(f"Splitting audio {audio_path} into {chunk_length_ms}ms chunks")
|
| 291 |
+
|
| 292 |
+
# Load audio
|
| 293 |
+
audio = AudioSegment.from_file(audio_path)
|
| 294 |
+
|
| 295 |
+
# Create directory for chunks
|
| 296 |
+
chunks_dir = Path("temp_chunks")
|
| 297 |
+
chunks_dir.mkdir(exist_ok=True)
|
| 298 |
+
|
| 299 |
+
# Split audio into chunks
|
| 300 |
+
chunk_paths = []
|
| 301 |
+
for i, chunk_start in enumerate(range(0, len(audio), chunk_length_ms)):
|
| 302 |
+
chunk_end = min(chunk_start + chunk_length_ms, len(audio))
|
| 303 |
+
chunk = audio[chunk_start:chunk_end]
|
| 304 |
+
|
| 305 |
+
chunk_path = chunks_dir / f"chunk_{i}.wav"
|
| 306 |
+
chunk.export(chunk_path, format="wav")
|
| 307 |
+
chunk_paths.append(str(chunk_path))
|
| 308 |
+
|
| 309 |
+
logger.info(f"Split audio into {len(chunk_paths)} chunks")
|
| 310 |
+
return chunk_paths
|
| 311 |
+
|
| 312 |
+
@run_on_device
|
| 313 |
+
@time_it
|
| 314 |
+
def transcribe_with_whisper(self, audio_path: str, language: str = 'en', device: str = 'cpu') -> str:
|
| 315 |
+
"""
|
| 316 |
+
Transcribe audio using Whisper.
|
| 317 |
+
|
| 318 |
+
Args:
|
| 319 |
+
audio_path: Path to the audio file
|
| 320 |
+
language: Language code
|
| 321 |
+
device: Device to use for processing
|
| 322 |
+
|
| 323 |
+
Returns:
|
| 324 |
+
Transcribed text
|
| 325 |
+
"""
|
| 326 |
+
logger.info(f"Transcribing {audio_path} with Whisper on {device}")
|
| 327 |
+
|
| 328 |
+
try:
|
| 329 |
+
# Load model if not already loaded or if device has changed
|
| 330 |
+
if self.whisper_model is None or getattr(self, '_current_device', None) != device:
|
| 331 |
+
# Clear existing model if it exists to free memory
|
| 332 |
+
if self.whisper_model is not None:
|
| 333 |
+
del self.whisper_model
|
| 334 |
+
import gc
|
| 335 |
+
gc.collect()
|
| 336 |
+
torch.cuda.empty_cache() if device == 'cuda' else None
|
| 337 |
+
|
| 338 |
+
logger.info(f"Loading Whisper model on {device}")
|
| 339 |
+
# Use tiny model instead of base to reduce memory usage
|
| 340 |
+
self.whisper_model = whisper.load_model("tiny", device=device)
|
| 341 |
+
self._current_device = device
|
| 342 |
+
|
| 343 |
+
# Convert language code if needed (e.g., 'en-US' to 'en')
|
| 344 |
+
if '-' in language:
|
| 345 |
+
language = language.split('-')[0]
|
| 346 |
+
|
| 347 |
+
# Transcribe audio with reduced compute settings
|
| 348 |
+
result = self.whisper_model.transcribe(
|
| 349 |
+
audio_path,
|
| 350 |
+
language=language,
|
| 351 |
+
fp16=(device == 'cuda'), # Use fp16 only on CUDA
|
| 352 |
+
beam_size=3, # Reduce beam size (default is 5)
|
| 353 |
+
best_of=1 # Reduce number of candidates (default is 5)
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
return result["text"]
|
| 357 |
+
finally:
|
| 358 |
+
# Force garbage collection after transcription to free memory
|
| 359 |
+
import gc
|
| 360 |
+
gc.collect()
|
| 361 |
+
torch.cuda.empty_cache() if device == 'cuda' else None
|
| 362 |
+
|
| 363 |
+
@backoff.on_exception(
|
| 364 |
+
backoff.expo,
|
| 365 |
+
Exception,
|
| 366 |
+
max_tries=3
|
| 367 |
+
)
|
| 368 |
+
@time_it
|
| 369 |
+
def transcribe_audio(self, audio_path: str, language: str = 'en', service: str = 'whisper') -> str:
|
| 370 |
+
"""
|
| 371 |
+
Transcribe audio file to text.
|
| 372 |
+
|
| 373 |
+
Args:
|
| 374 |
+
audio_path: Path to the audio file
|
| 375 |
+
language: Language code
|
| 376 |
+
service: Transcription service to use ('whisper', 'groq', 'google_cloud', 'openai_whisper')
|
| 377 |
+
|
| 378 |
+
Returns:
|
| 379 |
+
Transcribed text
|
| 380 |
+
"""
|
| 381 |
+
logger.info(f"Starting transcription of {audio_path} using {service}")
|
| 382 |
+
|
| 383 |
+
# For cloud-based transcription services
|
| 384 |
+
if service in ['groq', 'google_cloud', 'openai_whisper']:
|
| 385 |
+
# Check if the requested service is available
|
| 386 |
+
if service not in self.cloud_transcription_service.available_recognizers:
|
| 387 |
+
logger.warning(f"Requested service {service} is not available, falling back to whisper")
|
| 388 |
+
service = 'whisper'
|
| 389 |
+
|
| 390 |
+
# Continue with the existing implementation
|
| 391 |
+
if service in ['groq', 'google_cloud', 'openai_whisper']:
|
| 392 |
+
logger.info(f"Using cloud-based transcription with {service}")
|
| 393 |
+
|
| 394 |
+
# For long audio files, split into chunks and transcribe each chunk
|
| 395 |
+
if os.path.getsize(audio_path) > 10 * 1024 * 1024: # 10 MB
|
| 396 |
+
logger.info(f"Audio file is large, splitting into chunks")
|
| 397 |
+
chunk_paths = self.split_audio(audio_path)
|
| 398 |
+
|
| 399 |
+
# Transcribe each chunk
|
| 400 |
+
transcripts = []
|
| 401 |
+
for chunk_path in chunk_paths:
|
| 402 |
+
# Transcribe with cloud service
|
| 403 |
+
results = self.cloud_transcription_service.transcribe(
|
| 404 |
+
chunk_path,
|
| 405 |
+
services=[service],
|
| 406 |
+
language=language
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
# Get the result for the requested service
|
| 410 |
+
if service in results and results[service] and not results[service].startswith('Error:'):
|
| 411 |
+
transcripts.append(results[service])
|
| 412 |
+
else:
|
| 413 |
+
logger.warning(f"Failed to transcribe chunk with {service}, falling back to whisper")
|
| 414 |
+
transcript = self.transcribe_with_whisper(chunk_path, language)
|
| 415 |
+
transcripts.append(transcript)
|
| 416 |
+
|
| 417 |
+
# Combine transcripts
|
| 418 |
+
full_transcript = " ".join(transcripts)
|
| 419 |
+
|
| 420 |
+
# Clean up chunks
|
| 421 |
+
for chunk_path in chunk_paths:
|
| 422 |
+
os.remove(chunk_path)
|
| 423 |
+
|
| 424 |
+
return full_transcript
|
| 425 |
+
else:
|
| 426 |
+
# Transcribe directly with cloud service
|
| 427 |
+
results = self.cloud_transcription_service.transcribe(
|
| 428 |
+
audio_path,
|
| 429 |
+
services=[service],
|
| 430 |
+
language=language
|
| 431 |
+
)
|
| 432 |
+
|
| 433 |
+
# Get the result for the requested service
|
| 434 |
+
if service in results and results[service] and not results[service].startswith('Error:'):
|
| 435 |
+
return results[service]
|
| 436 |
+
else:
|
| 437 |
+
logger.warning(f"Failed to transcribe with {service}, falling back to whisper")
|
| 438 |
+
return self.transcribe_with_whisper(audio_path, language)
|
| 439 |
+
|
| 440 |
+
# For local whisper transcription (default)
|
| 441 |
+
else:
|
| 442 |
+
# For long audio files, split into chunks and transcribe each chunk
|
| 443 |
+
if os.path.getsize(audio_path) > 10 * 1024 * 1024: # 10 MB
|
| 444 |
+
logger.info(f"Audio file is large, splitting into chunks")
|
| 445 |
+
chunk_paths = self.split_audio(audio_path)
|
| 446 |
+
|
| 447 |
+
# Transcribe each chunk
|
| 448 |
+
transcripts = []
|
| 449 |
+
for chunk_path in chunk_paths:
|
| 450 |
+
transcript = self.transcribe_with_whisper(chunk_path, language)
|
| 451 |
+
transcripts.append(transcript)
|
| 452 |
+
|
| 453 |
+
# Combine transcripts
|
| 454 |
+
full_transcript = " ".join(transcripts)
|
| 455 |
+
|
| 456 |
+
# Clean up chunks
|
| 457 |
+
for chunk_path in chunk_paths:
|
| 458 |
+
os.remove(chunk_path)
|
| 459 |
+
|
| 460 |
+
return full_transcript
|
| 461 |
+
else:
|
| 462 |
+
# Transcribe directly
|
| 463 |
+
return self.transcribe_with_whisper(audio_path, language)
|
| 464 |
+
|
| 465 |
+
@time_it
|
| 466 |
+
def process_video_speech(self, video_path: str, language: str = 'en', service: str = 'whisper') -> str:
|
| 467 |
+
"""
|
| 468 |
+
Process speech in a video file.
|
| 469 |
+
|
| 470 |
+
Args:
|
| 471 |
+
video_path: Path to the video file
|
| 472 |
+
language: Language code
|
| 473 |
+
service: Transcription service to use ('whisper', 'groq', 'google_cloud', 'openai_whisper')
|
| 474 |
+
If 'whisper' is selected, local Whisper model will be used.
|
| 475 |
+
If 'groq', 'google_cloud', or 'openai_whisper' are selected, cloud-based transcription will be used.
|
| 476 |
+
If the requested service is not available, it will fall back to 'whisper'.
|
| 477 |
+
|
| 478 |
+
Returns:
|
| 479 |
+
Transcribed text
|
| 480 |
+
"""
|
| 481 |
+
audio_path = None
|
| 482 |
+
extraction_method = None
|
| 483 |
+
|
| 484 |
+
# Check if the requested service is available
|
| 485 |
+
if service != 'whisper' and service not in self.cloud_transcription_service.available_recognizers:
|
| 486 |
+
logger.warning(f"Requested service {service} is not available, falling back to whisper")
|
| 487 |
+
service = 'whisper'
|
| 488 |
+
|
| 489 |
+
try:
|
| 490 |
+
# Extract audio
|
| 491 |
+
start_time = time.time()
|
| 492 |
+
audio_path = self.extract_audio(video_path)
|
| 493 |
+
extraction_time = time.time() - start_time
|
| 494 |
+
|
| 495 |
+
# Determine which method was used (for logging)
|
| 496 |
+
if self.ffmpeg_success:
|
| 497 |
+
extraction_method = "FFmpeg"
|
| 498 |
+
else:
|
| 499 |
+
extraction_method = "MoviePy"
|
| 500 |
+
|
| 501 |
+
logger.info(f"Audio extracted using {extraction_method} in {extraction_time:.4f} seconds")
|
| 502 |
+
|
| 503 |
+
# Transcribe audio
|
| 504 |
+
start_time = time.time()
|
| 505 |
+
transcript = self.transcribe_audio(audio_path, language, service)
|
| 506 |
+
transcription_time = time.time() - start_time
|
| 507 |
+
|
| 508 |
+
logger.info(f"Audio transcribed in {transcription_time:.4f} seconds")
|
| 509 |
+
logger.info(f"Total speech processing time: {extraction_time + transcription_time:.4f} seconds")
|
| 510 |
+
|
| 511 |
+
return transcript
|
| 512 |
+
|
| 513 |
+
except Exception as e:
|
| 514 |
+
logger.error(f"Error in process_video_speech: {str(e)}")
|
| 515 |
+
raise
|
| 516 |
+
|
| 517 |
+
finally:
|
| 518 |
+
# Clean up
|
| 519 |
+
if audio_path and os.path.exists(audio_path):
|
| 520 |
+
try:
|
| 521 |
+
os.remove(audio_path)
|
| 522 |
+
logger.info(f"Temporary audio file {audio_path} removed")
|
| 523 |
+
except Exception as e:
|
| 524 |
+
logger.warning(f"Failed to remove temporary audio file {audio_path}: {str(e)}")
|
| 525 |
+
|
| 526 |
+
# Force garbage collection
|
| 527 |
+
import gc
|
| 528 |
+
gc.collect()
|
| 529 |
+
if torch.cuda.is_available():
|
| 530 |
+
torch.cuda.empty_cache()
|
behavior_backend/app/services/processing/temp/video_processor.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import time
|
| 3 |
+
import json
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Dict, Any, Optional, Tuple
|
| 7 |
+
|
| 8 |
+
from app.core.config import settings
|
| 9 |
+
|
| 10 |
+
from app.utils.logging_utils import time_it
|
| 11 |
+
from app.utils.data_utils import json_to_dataframe
|
| 12 |
+
from app.services.processing.speech_service import SpeechService
|
| 13 |
+
from app.services.processing.emotion_analyzer import EmotionAnalyzer
|
| 14 |
+
from app.services.processing.ai_analysis import AIAnalysisService
|
| 15 |
+
|
| 16 |
+
# Configure logging
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
class VideoProcessor:
|
| 20 |
+
"""Service for processing videos."""
|
| 21 |
+
|
| 22 |
+
def __init__(self):
|
| 23 |
+
"""Initialize the video processor."""
|
| 24 |
+
self.speech_service = SpeechService()
|
| 25 |
+
self.emotion_analyzer = EmotionAnalyzer()
|
| 26 |
+
self.ai_analysis_service = AIAnalysisService()
|
| 27 |
+
|
| 28 |
+
@time_it
|
| 29 |
+
def process_video(
|
| 30 |
+
self,
|
| 31 |
+
video_path: str,
|
| 32 |
+
frame_rate: int = 1,
|
| 33 |
+
backend: str = 'mediapipe',
|
| 34 |
+
language: str = 'en',
|
| 35 |
+
generate_annotated_video: bool = False,
|
| 36 |
+
video_id: Optional[str] = None,
|
| 37 |
+
status_callback = None
|
| 38 |
+
) -> Tuple[str, str]:
|
| 39 |
+
"""
|
| 40 |
+
Process a video file for emotion analysis.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
video_path: Path to the video file
|
| 44 |
+
frame_rate: Process every nth frame
|
| 45 |
+
backend: Backend to use for face detection
|
| 46 |
+
language: Language of the video
|
| 47 |
+
generate_annotated_video: Whether to generate an annotated video
|
| 48 |
+
video_id: ID of the video (optional)
|
| 49 |
+
status_callback: Callback function for progress updates
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Tuple of (transcript, analysis_json)
|
| 53 |
+
"""
|
| 54 |
+
start_time = time.time()
|
| 55 |
+
|
| 56 |
+
results_dir = settings.RESULTS_DIR
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# Update status if callback provided
|
| 61 |
+
if status_callback:
|
| 62 |
+
status_callback(5) # 5% progress
|
| 63 |
+
|
| 64 |
+
logger.info(f"Processing video: {video_path}")
|
| 65 |
+
logger.info(f"Using backend: {backend}")
|
| 66 |
+
logger.info(f"Language: {language}")
|
| 67 |
+
|
| 68 |
+
# Force mediapipe backend for best GPU performance on Mac M3
|
| 69 |
+
if backend == 'opencv' or not backend or backend == "retinaface":
|
| 70 |
+
logger.info(f"Backend '{backend}' doesn't support GPU acceleration or is not recommended.")
|
| 71 |
+
logger.info(f"Switching to 'mediapipe' for GPU-accelerated frame analysis.")
|
| 72 |
+
backend = "mediapipe"
|
| 73 |
+
|
| 74 |
+
# Ensure we're using a GPU-compatible backend
|
| 75 |
+
if backend not in ['mediapipe', 'ssd', 'mtcnn']:
|
| 76 |
+
logger.info(f"Backend '{backend}' may not support GPU acceleration.")
|
| 77 |
+
logger.info(f"Consider using 'mediapipe' for best GPU performance on Mac M3.")
|
| 78 |
+
|
| 79 |
+
# Extract audio and transcribe
|
| 80 |
+
if status_callback:
|
| 81 |
+
status_callback(10) # 10% progress
|
| 82 |
+
|
| 83 |
+
logger.info("Starting speech-to-text processing...")
|
| 84 |
+
transcript = self.speech_service.process_video_speech(video_path, language)
|
| 85 |
+
logger.info(f"Speech-to-text completed. Text length: {len(transcript)} characters")
|
| 86 |
+
|
| 87 |
+
# Update status
|
| 88 |
+
if status_callback:
|
| 89 |
+
status_callback(40) # 40% progress
|
| 90 |
+
|
| 91 |
+
# Process video frames
|
| 92 |
+
logger.info(f"Starting frame analysis with {backend} backend...")
|
| 93 |
+
results, annotated_video_path = self.emotion_analyzer.process_video_frames(
|
| 94 |
+
video_path=video_path,
|
| 95 |
+
frame_rate=frame_rate,
|
| 96 |
+
backend=backend,
|
| 97 |
+
generate_annotated_video=generate_annotated_video,
|
| 98 |
+
status_callback=status_callback
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# Check if we have any results
|
| 102 |
+
if not results:
|
| 103 |
+
logger.warning("No emotions detected in any frames.")
|
| 104 |
+
empty_results_json = json.dumps({'backend': []})
|
| 105 |
+
return transcript, empty_results_json
|
| 106 |
+
|
| 107 |
+
# Convert results to DataFrame
|
| 108 |
+
df = json_to_dataframe({'backend': results})
|
| 109 |
+
|
| 110 |
+
# Update status
|
| 111 |
+
if status_callback:
|
| 112 |
+
status_callback(80) # 80% progress
|
| 113 |
+
|
| 114 |
+
# Check if we have emotion data
|
| 115 |
+
if df.empty:
|
| 116 |
+
logger.warning("No emotions detected, cannot generate analysis.")
|
| 117 |
+
empty_results_json = json.dumps({'backend': results})
|
| 118 |
+
return transcript, empty_results_json
|
| 119 |
+
|
| 120 |
+
# Perform AI analysis
|
| 121 |
+
logger.info("Starting AI analysis...")
|
| 122 |
+
analysis = self.ai_analysis_service.analyze_emotions_and_transcript(df, transcript, language, InterviewAnalyzer)
|
| 123 |
+
|
| 124 |
+
# Update status
|
| 125 |
+
if status_callback:
|
| 126 |
+
status_callback(100) # 100% progress
|
| 127 |
+
|
| 128 |
+
# Log total processing time
|
| 129 |
+
end_time = time.time()
|
| 130 |
+
total_time_taken = end_time - start_time
|
| 131 |
+
logger.info(f"Total processing time: {total_time_taken:.2f} seconds")
|
| 132 |
+
|
| 133 |
+
# Convert analysis to JSON
|
| 134 |
+
analysis_json = json.dumps(analysis)
|
| 135 |
+
|
| 136 |
+
return transcript, analysis_json
|
| 137 |
+
|
| 138 |
+
# Create a singleton instance
|
| 139 |
+
video_processor = VideoProcessor()
|
| 140 |
+
|
| 141 |
+
# Function to maintain backward compatibility
|
| 142 |
+
def process_video(
|
| 143 |
+
video_path: str,
|
| 144 |
+
frame_rate: int = 1,
|
| 145 |
+
backend: str = 'mediapipe',
|
| 146 |
+
language: str = 'en',
|
| 147 |
+
generate_annotated_video: bool = False,
|
| 148 |
+
video_id: Optional[str] = None,
|
| 149 |
+
status_callback = None
|
| 150 |
+
) -> Tuple[str, str]:
|
| 151 |
+
"""
|
| 152 |
+
Process a video file for emotion analysis (backward compatibility function).
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
video_path: Path to the video file
|
| 156 |
+
frame_rate: Process every nth frame
|
| 157 |
+
backend: Backend to use for face detection
|
| 158 |
+
language: Language of the video
|
| 159 |
+
generate_annotated_video: Whether to generate an annotated video
|
| 160 |
+
video_id: ID of the video (optional)
|
| 161 |
+
status_callback: Callback function for progress updates
|
| 162 |
+
|
| 163 |
+
Returns:
|
| 164 |
+
Tuple of (transcript, analysis_json)
|
| 165 |
+
"""
|
| 166 |
+
return video_processor.process_video(
|
| 167 |
+
video_path=video_path,
|
| 168 |
+
frame_rate=frame_rate,
|
| 169 |
+
backend=backend,
|
| 170 |
+
language=language,
|
| 171 |
+
generate_annotated_video=generate_annotated_video,
|
| 172 |
+
video_id=video_id,
|
| 173 |
+
status_callback=status_callback
|
| 174 |
+
)
|
behavior_backend/app/services/processing/video_processor.py
ADDED
|
@@ -0,0 +1,644 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import time
|
| 3 |
+
import json
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import cv2
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Dict, Any, Optional, Tuple
|
| 8 |
+
import os
|
| 9 |
+
import concurrent.futures
|
| 10 |
+
|
| 11 |
+
from app.utils.logging_utils import time_it, setup_logger
|
| 12 |
+
from app.utils.data_utils import json_to_dataframe
|
| 13 |
+
from app.core.config import settings
|
| 14 |
+
from app.services.processing.speech_service import SpeechService
|
| 15 |
+
from app.services.processing.emotion_analyzer import EmotionAnalyzer
|
| 16 |
+
from app.services.processing.ai_analysis import AIAnalysisService
|
| 17 |
+
from app.services.processing.eye_contact_analyzer import analyze_video_file as analyze_eye_contact_video
|
| 18 |
+
from app.services.processing.body_language_analyzer import analyze_video_file as analyze_body_language_video
|
| 19 |
+
from app.services.processing.ai_face_analyzer import AIFaceAnalyzer
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# Configure logging
|
| 23 |
+
logger = setup_logger(__name__)
|
| 24 |
+
|
| 25 |
+
class VideoProcessor:
|
| 26 |
+
"""Service for processing videos."""
|
| 27 |
+
|
| 28 |
+
def __init__(self):
|
| 29 |
+
"""Initialize the video processor."""
|
| 30 |
+
self.speech_service = SpeechService()
|
| 31 |
+
self.emotion_analyzer = EmotionAnalyzer()
|
| 32 |
+
self.ai_analysis_service = AIAnalysisService()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@time_it
|
| 36 |
+
def process_video(
|
| 37 |
+
self,
|
| 38 |
+
video_path: str,
|
| 39 |
+
frame_rate: int = 1,
|
| 40 |
+
backend: str = 'mediapipe',
|
| 41 |
+
language: str = 'en',
|
| 42 |
+
generate_annotated_video: bool = False,
|
| 43 |
+
video_id: Optional[str] = None,
|
| 44 |
+
status_callback = None,
|
| 45 |
+
min_face_confidence: float = 0.5,
|
| 46 |
+
min_face_size_ratio: float = 0.05,
|
| 47 |
+
save_emotion_stats: bool = True,
|
| 48 |
+
skip_frames: int = 2, # Default parameter, not used for frame sampling anymore
|
| 49 |
+
adaptive_sampling: bool = False, # Disable adaptive sampling to match test behavior
|
| 50 |
+
analyze_eye_contact: bool = True,
|
| 51 |
+
analyze_body_language: bool = True,
|
| 52 |
+
analyze_face: bool = True,
|
| 53 |
+
job_title: str = "Professional",
|
| 54 |
+
model_name: str = "gpt-4o"
|
| 55 |
+
) -> Tuple[str, str]:
|
| 56 |
+
"""
|
| 57 |
+
Process a video file for emotion analysis.
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
video_path: Path to the video file
|
| 61 |
+
frame_rate: Process every nth frame (controls the sampling rate of frames for analysis)
|
| 62 |
+
backend: Backend to use for face detection
|
| 63 |
+
language: Language of the video
|
| 64 |
+
generate_annotated_video: Whether to generate an annotated video
|
| 65 |
+
video_id: ID of the video (optional)
|
| 66 |
+
status_callback: Callback function for progress updates
|
| 67 |
+
min_face_confidence: Minimum confidence for face detection
|
| 68 |
+
min_face_size_ratio: Minimum face size as ratio of image dimensions
|
| 69 |
+
save_emotion_stats: Whether to save detailed emotion statistics as JSON
|
| 70 |
+
skip_frames: Legacy parameter, kept for backward compatibility but not used
|
| 71 |
+
adaptive_sampling: Whether to use adaptive sampling
|
| 72 |
+
analyze_eye_contact: Whether to analyze eye contact
|
| 73 |
+
analyze_body_language: Whether to analyze body language
|
| 74 |
+
analyze_face: Whether to analyze face
|
| 75 |
+
job_title: Job title for face analysis
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
Tuple of (transcript, analysis_json)
|
| 79 |
+
"""
|
| 80 |
+
start_time = time.time()
|
| 81 |
+
|
| 82 |
+
# Add debug info about the video path
|
| 83 |
+
logger.info(f"DEBUG - Processing video path: {video_path}")
|
| 84 |
+
logger.info(f"DEBUG - Video file exists: {os.path.exists(video_path)}")
|
| 85 |
+
if os.path.exists(video_path):
|
| 86 |
+
logger.info(f"DEBUG - Video file size: {os.path.getsize(video_path) / (1024*1024):.2f} MB")
|
| 87 |
+
|
| 88 |
+
# Create results directory if it doesn't exist
|
| 89 |
+
results_dir = settings.RESULTS_DIR
|
| 90 |
+
os.makedirs(results_dir, exist_ok=True)
|
| 91 |
+
|
| 92 |
+
# Update status if callback provided
|
| 93 |
+
if status_callback:
|
| 94 |
+
status_callback(5) # 5% progress
|
| 95 |
+
|
| 96 |
+
logger.info(f"Processing video: {video_path}")
|
| 97 |
+
logger.info(f"Using backend: {backend}")
|
| 98 |
+
logger.info(f"Language: {language}")
|
| 99 |
+
|
| 100 |
+
# Force mediapipe backend for best GPU performance on Mac M3
|
| 101 |
+
if backend == 'opencv' or not backend or backend == "retinaface":
|
| 102 |
+
logger.info(f"Backend '{backend}' doesn't support GPU acceleration or is not recommended.")
|
| 103 |
+
logger.info(f"Switching to 'mediapipe' for GPU-accelerated frame analysis.")
|
| 104 |
+
backend = "mediapipe"
|
| 105 |
+
|
| 106 |
+
# Ensure we're using a GPU-compatible backend
|
| 107 |
+
if backend not in ['mediapipe', 'ssd', 'mtcnn']:
|
| 108 |
+
logger.info(f"Backend '{backend}' may not be optimized for GPU acceleration.")
|
| 109 |
+
logger.info(f"Consider using 'mediapipe' for best GPU performance.")
|
| 110 |
+
|
| 111 |
+
# Define worker functions for parallel processing
|
| 112 |
+
def process_speech(video_path, language):
|
| 113 |
+
logger.info("Starting speech-to-text processing...")
|
| 114 |
+
try:
|
| 115 |
+
service = 'groq'
|
| 116 |
+
transcript = self.speech_service.process_video_speech(video_path, language, service)
|
| 117 |
+
logger.info(f"Speech-to-text completed. Text length: {len(transcript)} characters")
|
| 118 |
+
return transcript
|
| 119 |
+
except Exception as e:
|
| 120 |
+
logger.error(f"Error during speech-to-text processing: {str(e)}")
|
| 121 |
+
logger.warning("Continuing with empty transcript due to speech processing failure")
|
| 122 |
+
return ""
|
| 123 |
+
|
| 124 |
+
def process_eye_contact(video_path, model_name):
|
| 125 |
+
logger.info("Starting eye contact analysis...")
|
| 126 |
+
try:
|
| 127 |
+
results = analyze_eye_contact_video(
|
| 128 |
+
video_path=video_path,
|
| 129 |
+
display_video=False,
|
| 130 |
+
save_results=False,
|
| 131 |
+
model_name=model_name
|
| 132 |
+
)
|
| 133 |
+
logger.info("Eye contact analysis completed successfully")
|
| 134 |
+
return results
|
| 135 |
+
except Exception as e:
|
| 136 |
+
logger.error(f"Error during eye contact analysis: {str(e)}")
|
| 137 |
+
logger.warning("Continuing without eye contact analysis")
|
| 138 |
+
return None
|
| 139 |
+
|
| 140 |
+
def process_body_language(video_path):
|
| 141 |
+
logger.info("Starting body language analysis...")
|
| 142 |
+
try:
|
| 143 |
+
results = analyze_body_language_video(
|
| 144 |
+
video_path=video_path,
|
| 145 |
+
display_video=False,
|
| 146 |
+
save_results=False
|
| 147 |
+
)
|
| 148 |
+
logger.info("Body language analysis completed successfully")
|
| 149 |
+
return results
|
| 150 |
+
except Exception as e:
|
| 151 |
+
logger.error(f"Error during body language analysis: {str(e)}")
|
| 152 |
+
logger.warning("Continuing without body language analysis")
|
| 153 |
+
return None
|
| 154 |
+
|
| 155 |
+
def process_face_analysis(video_path, job_title):
|
| 156 |
+
logger.info("Starting face analysis...")
|
| 157 |
+
try:
|
| 158 |
+
# Create a temp directory for extracted frames
|
| 159 |
+
temp_frames_dir = Path("temp_face_frames")
|
| 160 |
+
os.makedirs(temp_frames_dir, exist_ok=True)
|
| 161 |
+
|
| 162 |
+
face_frames = []
|
| 163 |
+
# Extract frames from the video
|
| 164 |
+
cap = cv2.VideoCapture(video_path)
|
| 165 |
+
if not cap.isOpened():
|
| 166 |
+
logger.error(f"Error: Could not open video file {video_path}")
|
| 167 |
+
return None
|
| 168 |
+
|
| 169 |
+
# Get video properties
|
| 170 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 171 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 172 |
+
|
| 173 |
+
# Extract 3 evenly distributed frames
|
| 174 |
+
num_frames = 3
|
| 175 |
+
frame_indices = [int(i * frame_count / (num_frames + 1)) for i in range(1, num_frames + 1)]
|
| 176 |
+
|
| 177 |
+
for i, frame_idx in enumerate(frame_indices):
|
| 178 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
| 179 |
+
ret, frame = cap.read()
|
| 180 |
+
if ret:
|
| 181 |
+
# Generate filename
|
| 182 |
+
timestamp = frame_idx / fps if fps > 0 else 0
|
| 183 |
+
minutes = int(timestamp // 60)
|
| 184 |
+
seconds = int(timestamp % 60)
|
| 185 |
+
filename = f"frame_{i+1}_at_{minutes:02d}m{seconds:02d}s.jpg"
|
| 186 |
+
output_path = temp_frames_dir / filename
|
| 187 |
+
|
| 188 |
+
# Save frame
|
| 189 |
+
cv2.imwrite(str(output_path), frame)
|
| 190 |
+
face_frames.append(str(output_path))
|
| 191 |
+
|
| 192 |
+
cap.release()
|
| 193 |
+
|
| 194 |
+
if face_frames:
|
| 195 |
+
# Analyze extracted frames
|
| 196 |
+
face_analyzer = AIFaceAnalyzer(provider="openai")
|
| 197 |
+
face_analysis_results = face_analyzer.analyze_profile_pictures(face_frames, job_title)
|
| 198 |
+
logger.info("Face analysis completed successfully")
|
| 199 |
+
return face_analysis_results
|
| 200 |
+
else:
|
| 201 |
+
logger.warning("No frames were extracted for face analysis")
|
| 202 |
+
return None
|
| 203 |
+
except Exception as e:
|
| 204 |
+
logger.error(f"Error during face analysis: {str(e)}")
|
| 205 |
+
logger.warning("Continuing without face analysis")
|
| 206 |
+
return None
|
| 207 |
+
|
| 208 |
+
def process_emotion_analysis(video_path, frame_rate, backend, generate_annotated_video, status_callback=None):
|
| 209 |
+
logger.info(f"Starting emotion analysis with {backend} backend...")
|
| 210 |
+
try:
|
| 211 |
+
# Initialize emotion analyzer with custom parameters
|
| 212 |
+
custom_emotion_analyzer = EmotionAnalyzer(
|
| 213 |
+
min_face_size_ratio=min_face_size_ratio,
|
| 214 |
+
min_confidence=min_face_confidence,
|
| 215 |
+
skip_similar_frames=False # Explicitly disable frame similarity checks
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
# Use process_video_frames from EmotionAnalyzer
|
| 219 |
+
all_results, annotated_video_path, timing_summary, metadata = custom_emotion_analyzer.process_video_frames(
|
| 220 |
+
video_path=video_path,
|
| 221 |
+
frame_rate=frame_rate,
|
| 222 |
+
backend=backend,
|
| 223 |
+
generate_annotated_video=generate_annotated_video,
|
| 224 |
+
status_callback=status_callback, # Pass the received status_callback
|
| 225 |
+
adaptive_sampling=adaptive_sampling,
|
| 226 |
+
max_frames=1000
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
# Log timing summary and metadata for monitoring
|
| 230 |
+
logger.info(f"Frame analysis timing summary: {timing_summary}")
|
| 231 |
+
logger.info(f"Frame analysis metadata: {metadata}")
|
| 232 |
+
logger.info(f"Total frames analyzed: {len(all_results)}")
|
| 233 |
+
|
| 234 |
+
return all_results, annotated_video_path, timing_summary, metadata
|
| 235 |
+
except Exception as e:
|
| 236 |
+
logger.error(f"Error during emotion analysis: {str(e)}")
|
| 237 |
+
return [], None, {}, {}
|
| 238 |
+
|
| 239 |
+
# Execute tasks in parallel using ThreadPoolExecutor
|
| 240 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
| 241 |
+
# Start all tasks in parallel
|
| 242 |
+
future_transcript = executor.submit(process_speech, video_path, language)
|
| 243 |
+
|
| 244 |
+
futures = {}
|
| 245 |
+
if analyze_eye_contact:
|
| 246 |
+
futures['eye_contact'] = executor.submit(process_eye_contact, video_path, model_name)
|
| 247 |
+
|
| 248 |
+
if analyze_body_language:
|
| 249 |
+
futures['body_language'] = executor.submit(process_body_language, video_path)
|
| 250 |
+
|
| 251 |
+
if analyze_face:
|
| 252 |
+
futures['face'] = executor.submit(process_face_analysis, video_path, job_title)
|
| 253 |
+
|
| 254 |
+
# Always submit emotion analysis
|
| 255 |
+
futures['emotion'] = executor.submit(process_emotion_analysis, video_path, frame_rate, backend, generate_annotated_video, status_callback)
|
| 256 |
+
|
| 257 |
+
# Wait for all tasks to complete and collect results
|
| 258 |
+
transcript = future_transcript.result()
|
| 259 |
+
|
| 260 |
+
eye_contact_results = futures['eye_contact'].result() if 'eye_contact' in futures else None
|
| 261 |
+
body_language_results = futures['body_language'].result() if 'body_language' in futures else None
|
| 262 |
+
face_analysis_results = futures['face'].result() if 'face' in futures else None
|
| 263 |
+
|
| 264 |
+
all_results, annotated_video_path, timing_summary, metadata = futures['emotion'].result()
|
| 265 |
+
|
| 266 |
+
# Update status after parallel processing
|
| 267 |
+
if status_callback:
|
| 268 |
+
status_callback(80) # 80% progress
|
| 269 |
+
|
| 270 |
+
print("********Body language results**************" )
|
| 271 |
+
print(body_language_results)
|
| 272 |
+
print("********Eye contact results**************" )
|
| 273 |
+
print(eye_contact_results)
|
| 274 |
+
print("********End of results**************" )
|
| 275 |
+
|
| 276 |
+
# Check if we have any emotion results
|
| 277 |
+
if not all_results:
|
| 278 |
+
logger.warning("No emotions detected in any frames.")
|
| 279 |
+
empty_results = {
|
| 280 |
+
'backend': [],
|
| 281 |
+
'eye_contact_analysis': eye_contact_results if eye_contact_results else {},
|
| 282 |
+
'body_language_analysis': body_language_results if body_language_results else {},
|
| 283 |
+
'face_analysis': face_analysis_results if face_analysis_results else {}
|
| 284 |
+
}
|
| 285 |
+
empty_results_json = json.dumps(empty_results)
|
| 286 |
+
return transcript, empty_results_json
|
| 287 |
+
|
| 288 |
+
# Calculate emotion statistics
|
| 289 |
+
emotion_stats = self._calculate_emotion_statistics(all_results)
|
| 290 |
+
|
| 291 |
+
# Video info data
|
| 292 |
+
cap = cv2.VideoCapture(video_path)
|
| 293 |
+
video_fps = cap.get(cv2.CAP_PROP_FPS)
|
| 294 |
+
video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 295 |
+
duration = video_frames / video_fps if video_fps > 0 else 0
|
| 296 |
+
cap.release()
|
| 297 |
+
|
| 298 |
+
# Create comprehensive results structure
|
| 299 |
+
comprehensive_results = {
|
| 300 |
+
"video_info": {
|
| 301 |
+
"path": video_path,
|
| 302 |
+
"frames": video_frames,
|
| 303 |
+
"fps": video_fps,
|
| 304 |
+
"duration_seconds": duration,
|
| 305 |
+
"device_used": metadata.get("device", "unknown"),
|
| 306 |
+
"backend": backend,
|
| 307 |
+
"face_detection_params": {
|
| 308 |
+
"min_confidence": min_face_confidence,
|
| 309 |
+
"min_face_size_ratio": min_face_size_ratio
|
| 310 |
+
}
|
| 311 |
+
},
|
| 312 |
+
"emotion_stats": emotion_stats,
|
| 313 |
+
"frames_analyzed": len(all_results),
|
| 314 |
+
"execution_stats": {
|
| 315 |
+
"total_processing_time_seconds": timing_summary.get("total_time", 0),
|
| 316 |
+
"avg_processing_time_seconds": timing_summary.get("avg_time_per_frame", 0),
|
| 317 |
+
"timing_breakdown": {
|
| 318 |
+
"face_detection": metadata.get("detailed_timing", {}).get("face_detection", 0),
|
| 319 |
+
"emotion_analysis": metadata.get("detailed_timing", {}).get("emotion_analysis", 0),
|
| 320 |
+
"temporal_consistency": metadata.get("detailed_timing", {}).get("temporal_consistency", 0),
|
| 321 |
+
"cache_check": metadata.get("detailed_timing", {}).get("cache_check", 0),
|
| 322 |
+
"similarity_check": metadata.get("detailed_timing", {}).get("similarity_check", 0),
|
| 323 |
+
"total": timing_summary.get("avg_time_per_frame", 0)
|
| 324 |
+
}
|
| 325 |
+
}
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
# Add eye contact, body language, and face analysis results if available
|
| 329 |
+
if eye_contact_results:
|
| 330 |
+
comprehensive_results["eye_contact_analysis"] = eye_contact_results
|
| 331 |
+
|
| 332 |
+
if body_language_results:
|
| 333 |
+
comprehensive_results["body_language_analysis"] = body_language_results
|
| 334 |
+
|
| 335 |
+
if face_analysis_results:
|
| 336 |
+
comprehensive_results["face_analysis"] = face_analysis_results
|
| 337 |
+
|
| 338 |
+
# Determine overall sentiment based on emotion_percentages
|
| 339 |
+
dominant_emotion, _ = max(emotion_stats["emotion_percentages"].items(), key=lambda x: x[1], default=("neutral", 0))
|
| 340 |
+
comprehensive_results["overall_sentiment"] = dominant_emotion.capitalize()
|
| 341 |
+
|
| 342 |
+
# Print the JSON results to console for immediate feedback
|
| 343 |
+
print("\n--- Comprehensive Analysis JSON Results ---")
|
| 344 |
+
print(json.dumps(comprehensive_results, indent=2))
|
| 345 |
+
print("--------------------------------------\n")
|
| 346 |
+
|
| 347 |
+
# Process the results to ensure they have the required fields
|
| 348 |
+
processed_results = self._process_emotion_results(all_results)
|
| 349 |
+
|
| 350 |
+
# Convert results to DataFrame
|
| 351 |
+
df = json_to_dataframe({'backend': processed_results})
|
| 352 |
+
|
| 353 |
+
# Store original emotion data from emotion_stats on the DataFrame
|
| 354 |
+
if emotion_stats["emotion_percentages"]:
|
| 355 |
+
# Use the emotion_percentages data for all rows
|
| 356 |
+
df['raw_emotion_data'] = [emotion_stats["emotion_percentages"]] * len(df)
|
| 357 |
+
|
| 358 |
+
# Add confidence data as a separate field
|
| 359 |
+
confidence_data = {
|
| 360 |
+
"confidence_by_emotion": emotion_stats["confidence_by_emotion"],
|
| 361 |
+
"average_confidence": emotion_stats["average_confidence"]
|
| 362 |
+
}
|
| 363 |
+
df['confidence_data'] = [confidence_data] * len(df)
|
| 364 |
+
|
| 365 |
+
# Add overall sentiment to each row
|
| 366 |
+
df['overall_sentiment'] = comprehensive_results["overall_sentiment"]
|
| 367 |
+
|
| 368 |
+
logger.info(f"Added emotion percentages data to DataFrame: {emotion_stats['emotion_percentages']}")
|
| 369 |
+
logger.info(f"Added confidence data to DataFrame: {confidence_data}")
|
| 370 |
+
logger.info(f"Added overall sentiment to DataFrame: {comprehensive_results['overall_sentiment']}")
|
| 371 |
+
else:
|
| 372 |
+
logger.warning("No emotion data found to add to DataFrame")
|
| 373 |
+
|
| 374 |
+
# Check if we have emotion data
|
| 375 |
+
if df.empty:
|
| 376 |
+
logger.warning("No emotions detected, cannot generate analysis.")
|
| 377 |
+
# Use the already processed results if available, or create empty list if not
|
| 378 |
+
if 'processed_results' not in locals():
|
| 379 |
+
processed_results = []
|
| 380 |
+
empty_results = {
|
| 381 |
+
'backend': processed_results,
|
| 382 |
+
'eye_contact_analysis': eye_contact_results if eye_contact_results else {},
|
| 383 |
+
'body_language_analysis': body_language_results if body_language_results else {},
|
| 384 |
+
'face_analysis': face_analysis_results if face_analysis_results else {}
|
| 385 |
+
}
|
| 386 |
+
empty_results_json = json.dumps(empty_results)
|
| 387 |
+
return transcript, empty_results_json
|
| 388 |
+
|
| 389 |
+
# Perform AI analysis
|
| 390 |
+
logger.info("Starting AI analysis...")
|
| 391 |
+
try:
|
| 392 |
+
# Log the data being passed to the AI analysis
|
| 393 |
+
if eye_contact_results:
|
| 394 |
+
logger.info(f"Passing eye_contact_data to AI analysis with {len(str(eye_contact_results))} characters")
|
| 395 |
+
else:
|
| 396 |
+
logger.info("No eye_contact_data available to pass to AI analysis")
|
| 397 |
+
|
| 398 |
+
if body_language_results:
|
| 399 |
+
logger.info(f"Passing body_language_data to AI analysis with {len(str(body_language_results))} characters")
|
| 400 |
+
else:
|
| 401 |
+
logger.info("No body_language_data available to pass to AI analysis")
|
| 402 |
+
|
| 403 |
+
if face_analysis_results:
|
| 404 |
+
logger.info(f"Passing face_analysis_data to AI analysis with {len(str(face_analysis_results))} items")
|
| 405 |
+
else:
|
| 406 |
+
logger.info("No face_analysis_data available to pass to AI analysis")
|
| 407 |
+
|
| 408 |
+
analysis = self.ai_analysis_service.analyze_emotions_and_transcript(
|
| 409 |
+
df,
|
| 410 |
+
transcript,
|
| 411 |
+
language,
|
| 412 |
+
eye_contact_data=eye_contact_results,
|
| 413 |
+
body_language_data=body_language_results,
|
| 414 |
+
face_analysis_data=face_analysis_results,
|
| 415 |
+
model_name=model_name
|
| 416 |
+
)
|
| 417 |
+
except Exception as e:
|
| 418 |
+
logger.error(f"Error during AI analysis: {str(e)}")
|
| 419 |
+
results_with_error = {
|
| 420 |
+
'backend': processed_results,
|
| 421 |
+
'error': str(e),
|
| 422 |
+
'eye_contact_analysis': eye_contact_results if eye_contact_results else {},
|
| 423 |
+
'body_language_analysis': body_language_results if body_language_results else {},
|
| 424 |
+
'face_analysis': face_analysis_results if face_analysis_results else {}
|
| 425 |
+
}
|
| 426 |
+
results_json = json.dumps(results_with_error)
|
| 427 |
+
return transcript, results_json
|
| 428 |
+
|
| 429 |
+
# Update status
|
| 430 |
+
if status_callback:
|
| 431 |
+
status_callback(100) # 100% progress
|
| 432 |
+
|
| 433 |
+
# Log total processing time
|
| 434 |
+
end_time = time.time()
|
| 435 |
+
total_time_taken = end_time - start_time
|
| 436 |
+
logger.info(f"Total processing time: {total_time_taken:.2f} seconds")
|
| 437 |
+
|
| 438 |
+
# Convert analysis to JSON
|
| 439 |
+
analysis_json = json.dumps(analysis)
|
| 440 |
+
|
| 441 |
+
return transcript, analysis_json
|
| 442 |
+
|
| 443 |
+
def _calculate_emotion_statistics(self, all_results):
|
| 444 |
+
"""Calculate comprehensive emotion statistics from frame results."""
|
| 445 |
+
# Count frames with faces
|
| 446 |
+
frames_with_faces = 0
|
| 447 |
+
total_faces = 0
|
| 448 |
+
total_confidence = 0
|
| 449 |
+
|
| 450 |
+
emotion_counts = {
|
| 451 |
+
"angry": 0,
|
| 452 |
+
"disgust": 0,
|
| 453 |
+
"fear": 0,
|
| 454 |
+
"happy": 0,
|
| 455 |
+
"sad": 0,
|
| 456 |
+
"surprise": 0,
|
| 457 |
+
"neutral": 0
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
confidence_by_emotion = {emotion: [] for emotion in emotion_counts.keys()}
|
| 461 |
+
|
| 462 |
+
# Process each frame result
|
| 463 |
+
for result in all_results:
|
| 464 |
+
faces = result.get("faces", [])
|
| 465 |
+
if faces:
|
| 466 |
+
frames_with_faces += 1
|
| 467 |
+
total_faces += len(faces)
|
| 468 |
+
|
| 469 |
+
# Count main emotion if available
|
| 470 |
+
if "main_emotion" in result:
|
| 471 |
+
main_emotion = result["main_emotion"]["emotion"]
|
| 472 |
+
confidence = result["main_emotion"]["confidence"]
|
| 473 |
+
|
| 474 |
+
if main_emotion in emotion_counts:
|
| 475 |
+
emotion_counts[main_emotion] += 1
|
| 476 |
+
confidence_by_emotion[main_emotion].append(confidence)
|
| 477 |
+
total_confidence += confidence
|
| 478 |
+
# Otherwise check each face for emotions
|
| 479 |
+
else:
|
| 480 |
+
for face in faces:
|
| 481 |
+
if "emotion" in face:
|
| 482 |
+
# Find dominant emotion for this face
|
| 483 |
+
dominant_emotion = max(face["emotion"].items(), key=lambda x: x[1])
|
| 484 |
+
emotion_name = dominant_emotion[0]
|
| 485 |
+
confidence = dominant_emotion[1]
|
| 486 |
+
|
| 487 |
+
if emotion_name in emotion_counts:
|
| 488 |
+
emotion_counts[emotion_name] += 1
|
| 489 |
+
confidence_by_emotion[emotion_name].append(confidence)
|
| 490 |
+
total_confidence += confidence
|
| 491 |
+
|
| 492 |
+
# Calculate percentages
|
| 493 |
+
total_emotions = sum(emotion_counts.values())
|
| 494 |
+
emotion_percentages = {}
|
| 495 |
+
if total_emotions > 0:
|
| 496 |
+
for emotion, count in emotion_counts.items():
|
| 497 |
+
emotion_percentages[emotion] = (count / total_emotions) * 100
|
| 498 |
+
|
| 499 |
+
# Calculate face detection percentage
|
| 500 |
+
face_detection_percentage = 0
|
| 501 |
+
if all_results:
|
| 502 |
+
face_detection_percentage = (frames_with_faces / len(all_results)) * 100
|
| 503 |
+
|
| 504 |
+
# Calculate average confidence
|
| 505 |
+
average_confidence = 0
|
| 506 |
+
if total_emotions > 0:
|
| 507 |
+
average_confidence = total_confidence / total_emotions
|
| 508 |
+
|
| 509 |
+
# Calculate average confidence by emotion
|
| 510 |
+
confidence_averages = {}
|
| 511 |
+
for emotion, confidences in confidence_by_emotion.items():
|
| 512 |
+
if confidences:
|
| 513 |
+
confidence_averages[emotion] = sum(confidences) / len(confidences)
|
| 514 |
+
else:
|
| 515 |
+
confidence_averages[emotion] = 0
|
| 516 |
+
|
| 517 |
+
# Create emotion statistics
|
| 518 |
+
emotion_stats = {
|
| 519 |
+
"frames_with_faces": frames_with_faces,
|
| 520 |
+
"face_detection_percentage": face_detection_percentage,
|
| 521 |
+
"emotion_counts": emotion_counts,
|
| 522 |
+
"emotion_percentages": emotion_percentages,
|
| 523 |
+
"average_confidence": average_confidence,
|
| 524 |
+
"confidence_by_emotion": confidence_averages
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
return emotion_stats
|
| 528 |
+
|
| 529 |
+
def _process_emotion_results(self, all_results):
|
| 530 |
+
"""Process emotion results to ensure they have required fields."""
|
| 531 |
+
processed_results = []
|
| 532 |
+
|
| 533 |
+
# Process all results
|
| 534 |
+
for result in all_results:
|
| 535 |
+
# Skip empty results
|
| 536 |
+
if not result:
|
| 537 |
+
continue
|
| 538 |
+
|
| 539 |
+
# Process faces to ensure they have dominant_emotion and emotion_confidence
|
| 540 |
+
if 'faces' in result and result['faces']:
|
| 541 |
+
for face in result['faces']:
|
| 542 |
+
# If face has emotion data but no dominant_emotion, calculate it
|
| 543 |
+
if 'emotion' in face and 'dominant_emotion' not in face:
|
| 544 |
+
emotions = face['emotion']
|
| 545 |
+
if emotions:
|
| 546 |
+
# Find dominant emotion and its confidence
|
| 547 |
+
dominant_emotion, confidence = max(emotions.items(), key=lambda x: x[1])
|
| 548 |
+
face['dominant_emotion'] = dominant_emotion
|
| 549 |
+
face['emotion_confidence'] = confidence
|
| 550 |
+
face['emotion_stable'] = face.get('emotion_stable', False)
|
| 551 |
+
|
| 552 |
+
# Process main_face if it exists
|
| 553 |
+
if 'main_face' in result and result['main_face']:
|
| 554 |
+
main_face = result['main_face']
|
| 555 |
+
if 'emotion' in main_face and 'dominant_emotion' not in main_face:
|
| 556 |
+
emotions = main_face['emotion']
|
| 557 |
+
if emotions:
|
| 558 |
+
# Find dominant emotion and its confidence
|
| 559 |
+
dominant_emotion, confidence = max(emotions.items(), key=lambda x: x[1])
|
| 560 |
+
main_face['dominant_emotion'] = dominant_emotion
|
| 561 |
+
main_face['emotion_confidence'] = confidence
|
| 562 |
+
main_face['emotion_stable'] = main_face.get('emotion_stable', False)
|
| 563 |
+
|
| 564 |
+
# Process main_emotion if it exists
|
| 565 |
+
if 'main_emotion' in result and result['main_emotion']:
|
| 566 |
+
main_emotion = result['main_emotion']
|
| 567 |
+
# If main_emotion has emotion but not confidence, add it
|
| 568 |
+
if 'emotion' in main_emotion and 'confidence' not in main_emotion:
|
| 569 |
+
# Try to get confidence from main_face
|
| 570 |
+
if 'main_face' in result and result['main_face'] and 'emotion' in result['main_face']:
|
| 571 |
+
emotion_name = main_emotion['emotion']
|
| 572 |
+
main_emotion['confidence'] = result['main_face']['emotion'].get(emotion_name, 0)
|
| 573 |
+
|
| 574 |
+
processed_results.append(result)
|
| 575 |
+
|
| 576 |
+
return processed_results
|
| 577 |
+
|
| 578 |
+
# Create a singleton instance
|
| 579 |
+
video_processor = VideoProcessor()
|
| 580 |
+
|
| 581 |
+
# Function to maintain backward compatibility
|
| 582 |
+
def process_video(
|
| 583 |
+
video_path: str,
|
| 584 |
+
frame_rate: int = 1,
|
| 585 |
+
backend: str = 'mediapipe',
|
| 586 |
+
language: str = 'en',
|
| 587 |
+
generate_annotated_video: bool = False,
|
| 588 |
+
video_id: Optional[str] = None,
|
| 589 |
+
status_callback = None,
|
| 590 |
+
min_face_confidence: float = 0.5,
|
| 591 |
+
min_face_size_ratio: float = 0.05,
|
| 592 |
+
save_emotion_stats: bool = True,
|
| 593 |
+
skip_frames: int = 2, # Default parameter, not used for frame sampling anymore
|
| 594 |
+
adaptive_sampling: bool = False, # Control whether adaptive sampling is used
|
| 595 |
+
analyze_eye_contact: bool = True,
|
| 596 |
+
analyze_body_language: bool = True,
|
| 597 |
+
analyze_face: bool = True,
|
| 598 |
+
job_title: str = "Professional",
|
| 599 |
+
model_name: str = "gpt-4o"
|
| 600 |
+
) -> Tuple[str, str]:
|
| 601 |
+
"""
|
| 602 |
+
Process a video file for emotion analysis (backward compatibility function).
|
| 603 |
+
|
| 604 |
+
Args:
|
| 605 |
+
video_path: Path to the video file
|
| 606 |
+
frame_rate: Process every nth frame (controls the sampling rate of frames for analysis)
|
| 607 |
+
backend: Backend to use for face detection
|
| 608 |
+
language: Language of the video
|
| 609 |
+
generate_annotated_video: Whether to generate an annotated video
|
| 610 |
+
video_id: ID of the video (optional)
|
| 611 |
+
status_callback: Callback function for progress updates
|
| 612 |
+
min_face_confidence: Minimum confidence for face detection
|
| 613 |
+
min_face_size_ratio: Minimum face size as ratio of image dimensions
|
| 614 |
+
save_emotion_stats: Whether to save detailed emotion statistics as JSON
|
| 615 |
+
skip_frames: Legacy parameter, kept for backward compatibility but not used
|
| 616 |
+
adaptive_sampling: Whether to use adaptive sampling
|
| 617 |
+
analyze_eye_contact: Whether to analyze eye contact
|
| 618 |
+
analyze_body_language: Whether to analyze body language
|
| 619 |
+
analyze_face: Whether to analyze face
|
| 620 |
+
job_title: Job title for face analysis
|
| 621 |
+
model_name: The name of the model to use for AI analysis
|
| 622 |
+
|
| 623 |
+
Returns:
|
| 624 |
+
Tuple of (transcript, analysis_json)
|
| 625 |
+
"""
|
| 626 |
+
return video_processor.process_video(
|
| 627 |
+
video_path=video_path,
|
| 628 |
+
frame_rate=frame_rate,
|
| 629 |
+
backend=backend,
|
| 630 |
+
language=language,
|
| 631 |
+
generate_annotated_video=generate_annotated_video,
|
| 632 |
+
video_id=video_id,
|
| 633 |
+
status_callback=status_callback,
|
| 634 |
+
min_face_confidence=min_face_confidence,
|
| 635 |
+
min_face_size_ratio=min_face_size_ratio,
|
| 636 |
+
save_emotion_stats=save_emotion_stats,
|
| 637 |
+
skip_frames=skip_frames,
|
| 638 |
+
adaptive_sampling=adaptive_sampling,
|
| 639 |
+
analyze_eye_contact=analyze_eye_contact,
|
| 640 |
+
analyze_body_language=analyze_body_language,
|
| 641 |
+
analyze_face=analyze_face,
|
| 642 |
+
job_title=job_title,
|
| 643 |
+
model_name=model_name
|
| 644 |
+
)
|
behavior_backend/app/services/video_service.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from fastapi import UploadFile
|
| 3 |
+
from sqlalchemy.orm import Session
|
| 4 |
+
from typing import List, Dict, Any, Optional
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
import ntpath
|
| 7 |
+
|
| 8 |
+
from app.db.repositories.video import VideoRepository
|
| 9 |
+
from app.db.repositories.results import ResultsRepository
|
| 10 |
+
from app.models.video import VideoMetadata, VideoAnalysisResponse
|
| 11 |
+
from app.utils.file_utils import save_upload_file, get_video_duration
|
| 12 |
+
from app.core.exceptions import VideoNotFoundError, VideoUploadError
|
| 13 |
+
|
| 14 |
+
class VideoService:
|
| 15 |
+
"""Service for video operations."""
|
| 16 |
+
|
| 17 |
+
def __init__(self, db: Session):
|
| 18 |
+
self.db = db
|
| 19 |
+
self.video_repo = VideoRepository(db)
|
| 20 |
+
self.results_repo = ResultsRepository(db)
|
| 21 |
+
|
| 22 |
+
async def upload_video(self, file: UploadFile) -> VideoMetadata:
|
| 23 |
+
"""
|
| 24 |
+
Upload a video file and save metadata to the database.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
file: The uploaded file
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
VideoMetadata object
|
| 31 |
+
|
| 32 |
+
Raises:
|
| 33 |
+
VideoUploadError: If there is an error uploading the video
|
| 34 |
+
"""
|
| 35 |
+
if not file.filename:
|
| 36 |
+
raise VideoUploadError("No file provided")
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
# Save the file and get video_id and path
|
| 40 |
+
video_id, upload_path = save_upload_file(file)
|
| 41 |
+
|
| 42 |
+
# Get file size and duration
|
| 43 |
+
file_size = os.path.getsize(upload_path)
|
| 44 |
+
duration = get_video_duration(str(upload_path))
|
| 45 |
+
|
| 46 |
+
# Create database entry
|
| 47 |
+
db_video = self.video_repo.create(
|
| 48 |
+
original_filename=file.filename,
|
| 49 |
+
file_path=str(upload_path),
|
| 50 |
+
size=file_size,
|
| 51 |
+
duration=duration
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Get filename for URL
|
| 55 |
+
filename = ntpath.basename(upload_path)
|
| 56 |
+
|
| 57 |
+
# Return metadata
|
| 58 |
+
return VideoMetadata(
|
| 59 |
+
video_id=db_video.id,
|
| 60 |
+
original_filename=db_video.original_filename,
|
| 61 |
+
upload_date=db_video.upload_date.isoformat(),
|
| 62 |
+
size=db_video.size,
|
| 63 |
+
duration=db_video.duration,
|
| 64 |
+
status=db_video.status,
|
| 65 |
+
video_url=f"/uploads/{filename}"
|
| 66 |
+
)
|
| 67 |
+
except Exception as e:
|
| 68 |
+
raise VideoUploadError(f"Error uploading video: {str(e)}")
|
| 69 |
+
|
| 70 |
+
def get_video_metadata(self, video_id: str) -> VideoMetadata:
|
| 71 |
+
"""
|
| 72 |
+
Get metadata for a video.
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
video_id: ID of the video
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
VideoMetadata object
|
| 79 |
+
|
| 80 |
+
Raises:
|
| 81 |
+
VideoNotFoundError: If the video is not found
|
| 82 |
+
"""
|
| 83 |
+
db_video = self.video_repo.get_by_id(video_id)
|
| 84 |
+
|
| 85 |
+
if not db_video:
|
| 86 |
+
raise VideoNotFoundError(video_id)
|
| 87 |
+
|
| 88 |
+
# Get filename for URL
|
| 89 |
+
filename = ntpath.basename(db_video.file_path)
|
| 90 |
+
|
| 91 |
+
return VideoMetadata(
|
| 92 |
+
video_id=db_video.id,
|
| 93 |
+
original_filename=db_video.original_filename,
|
| 94 |
+
upload_date=db_video.upload_date.isoformat(),
|
| 95 |
+
size=db_video.size,
|
| 96 |
+
duration=db_video.duration,
|
| 97 |
+
status=db_video.status,
|
| 98 |
+
video_url=f"/uploads/{filename}"
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
def list_videos(self) -> List[VideoMetadata]:
|
| 102 |
+
"""
|
| 103 |
+
List all videos.
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
List of VideoMetadata objects
|
| 107 |
+
"""
|
| 108 |
+
db_videos = self.video_repo.get_all()
|
| 109 |
+
|
| 110 |
+
return [VideoMetadata(
|
| 111 |
+
video_id=db_video.id,
|
| 112 |
+
original_filename=db_video.original_filename,
|
| 113 |
+
upload_date=db_video.upload_date.isoformat(),
|
| 114 |
+
size=db_video.size,
|
| 115 |
+
duration=db_video.duration,
|
| 116 |
+
status=db_video.status,
|
| 117 |
+
video_url=f"/uploads/{ntpath.basename(db_video.file_path)}"
|
| 118 |
+
) for db_video in db_videos]
|
| 119 |
+
|
| 120 |
+
def delete_video(self, video_id: str) -> bool:
|
| 121 |
+
"""
|
| 122 |
+
Delete a video and its file from the system.
|
| 123 |
+
|
| 124 |
+
Args:
|
| 125 |
+
video_id: ID of the video to delete
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
bool: True if the video was deleted, False if it didn't exist
|
| 129 |
+
|
| 130 |
+
Raises:
|
| 131 |
+
VideoNotFoundError: If the video is not found
|
| 132 |
+
"""
|
| 133 |
+
# Get video metadata before deletion
|
| 134 |
+
db_video = self.video_repo.get_by_id(video_id)
|
| 135 |
+
|
| 136 |
+
if not db_video:
|
| 137 |
+
raise VideoNotFoundError(video_id)
|
| 138 |
+
|
| 139 |
+
# Store the file path for later deletion
|
| 140 |
+
file_path = db_video.file_path
|
| 141 |
+
|
| 142 |
+
# Delete processing results first (if they exist)
|
| 143 |
+
self.results_repo.delete_by_video_id(video_id)
|
| 144 |
+
|
| 145 |
+
# Delete from database
|
| 146 |
+
result = self.video_repo.delete(video_id)
|
| 147 |
+
|
| 148 |
+
# Delete the file if database deletion was successful
|
| 149 |
+
if result and file_path and os.path.exists(file_path):
|
| 150 |
+
try:
|
| 151 |
+
os.remove(file_path)
|
| 152 |
+
except Exception as e:
|
| 153 |
+
# Log the error but don't fail the operation
|
| 154 |
+
# The database entry is already deleted
|
| 155 |
+
print(f"Error deleting file {file_path}: {str(e)}")
|
| 156 |
+
|
| 157 |
+
return result
|
| 158 |
+
|
| 159 |
+
def store_processing_results(
|
| 160 |
+
self,
|
| 161 |
+
video_id: str,
|
| 162 |
+
transcript: str,
|
| 163 |
+
analysis_json: Dict[str, Any]
|
| 164 |
+
) -> bool:
|
| 165 |
+
"""
|
| 166 |
+
Store processing results in the database.
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
video_id: ID of the video
|
| 170 |
+
transcript: The transcript text
|
| 171 |
+
analysis_json: The analysis JSON
|
| 172 |
+
|
| 173 |
+
Returns:
|
| 174 |
+
bool: True if results were stored successfully
|
| 175 |
+
"""
|
| 176 |
+
try:
|
| 177 |
+
# Extract relevant data from the analysis JSON
|
| 178 |
+
emotion_analysis = analysis_json.get('Emotion Analysis', {})
|
| 179 |
+
overall_summary = analysis_json.get('Overall Summary', '')
|
| 180 |
+
transcript_analysis = analysis_json.get('Transcript Analysis', {})
|
| 181 |
+
recommendations = analysis_json.get('Recommendations', {})
|
| 182 |
+
|
| 183 |
+
# Extract additional data
|
| 184 |
+
body_language_analysis = analysis_json.get('Body Language Analysis', {})
|
| 185 |
+
eye_contact_analysis = analysis_json.get('Eye Contact Analysis', {})
|
| 186 |
+
|
| 187 |
+
# Raw data
|
| 188 |
+
body_language_data = analysis_json.get('body_language_analysis', {})
|
| 189 |
+
eye_contact_data = analysis_json.get('eye_contact_analysis', {})
|
| 190 |
+
face_analysis_data = analysis_json.get('face_analysis', {})
|
| 191 |
+
|
| 192 |
+
# Get existing results or create new
|
| 193 |
+
existing_results = self.results_repo.get_by_video_id(video_id)
|
| 194 |
+
|
| 195 |
+
if existing_results:
|
| 196 |
+
# Update existing results
|
| 197 |
+
self.results_repo.update(
|
| 198 |
+
video_id=video_id,
|
| 199 |
+
transcript=transcript,
|
| 200 |
+
emotion_analysis=emotion_analysis,
|
| 201 |
+
overall_summary=overall_summary,
|
| 202 |
+
transcript_analysis=transcript_analysis,
|
| 203 |
+
recommendations=recommendations,
|
| 204 |
+
body_language_analysis=body_language_analysis,
|
| 205 |
+
body_language_data=body_language_data,
|
| 206 |
+
eye_contact_analysis=eye_contact_analysis,
|
| 207 |
+
eye_contact_data=eye_contact_data,
|
| 208 |
+
face_analysis_data=face_analysis_data
|
| 209 |
+
)
|
| 210 |
+
else:
|
| 211 |
+
# Create new results
|
| 212 |
+
self.results_repo.create(
|
| 213 |
+
video_id=video_id,
|
| 214 |
+
transcript=transcript,
|
| 215 |
+
emotion_analysis=emotion_analysis,
|
| 216 |
+
overall_summary=overall_summary,
|
| 217 |
+
transcript_analysis=transcript_analysis,
|
| 218 |
+
recommendations=recommendations,
|
| 219 |
+
body_language_analysis=body_language_analysis,
|
| 220 |
+
body_language_data=body_language_data,
|
| 221 |
+
eye_contact_analysis=eye_contact_analysis,
|
| 222 |
+
eye_contact_data=eye_contact_data,
|
| 223 |
+
face_analysis_data=face_analysis_data
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
# Update video status
|
| 227 |
+
self.video_repo.update_status(video_id, "processed")
|
| 228 |
+
|
| 229 |
+
return True
|
| 230 |
+
except Exception as e:
|
| 231 |
+
print(f"Error storing processing results: {str(e)}")
|
| 232 |
+
return False
|
| 233 |
+
|
| 234 |
+
def get_processing_results(self, video_id: str) -> Optional[Dict[str, Any]]:
|
| 235 |
+
"""
|
| 236 |
+
Get processing results for a video.
|
| 237 |
+
|
| 238 |
+
Args:
|
| 239 |
+
video_id: ID of the video
|
| 240 |
+
|
| 241 |
+
Returns:
|
| 242 |
+
Optional[Dict[str, Any]]: Processing results or None if not found
|
| 243 |
+
"""
|
| 244 |
+
# Get results from database
|
| 245 |
+
db_results = self.results_repo.get_by_video_id(video_id)
|
| 246 |
+
|
| 247 |
+
if not db_results:
|
| 248 |
+
return None
|
| 249 |
+
|
| 250 |
+
# Format results
|
| 251 |
+
results = {
|
| 252 |
+
"transcript": db_results.transcript,
|
| 253 |
+
"emotion_analysis": db_results.emotion_analysis,
|
| 254 |
+
"overall_summary": db_results.overall_summary,
|
| 255 |
+
"transcript_analysis": db_results.transcript_analysis,
|
| 256 |
+
"recommendations": db_results.recommendations,
|
| 257 |
+
"body_language_analysis": db_results.body_language_analysis,
|
| 258 |
+
"eye_contact_analysis": db_results.eye_contact_analysis,
|
| 259 |
+
"face_analysis_data": db_results.face_analysis_data
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
return results
|
behavior_backend/app/utils/__init__.py
ADDED
|
File without changes
|
behavior_backend/app/utils/auth.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime, timedelta
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from jose import JWTError, jwt
|
| 4 |
+
from fastapi import Depends, HTTPException, status
|
| 5 |
+
from fastapi.security import OAuth2PasswordBearer, APIKeyHeader
|
| 6 |
+
from sqlalchemy.orm import Session
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
from app.core.config import settings
|
| 10 |
+
from app.db.base import get_db
|
| 11 |
+
from app.db.models import User
|
| 12 |
+
from app.models.token import TokenData
|
| 13 |
+
|
| 14 |
+
# OAuth2 scheme for token authentication
|
| 15 |
+
oauth2_scheme = OAuth2PasswordBearer(tokenUrl=f"{settings.API_V1_STR}/auth/login")
|
| 16 |
+
|
| 17 |
+
# API Key security scheme
|
| 18 |
+
API_KEY_NAME = "X-API-Key"
|
| 19 |
+
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
| 20 |
+
|
| 21 |
+
# Use API key from settings
|
| 22 |
+
async def get_api_key_user(
|
| 23 |
+
api_key: str = Depends(api_key_header),
|
| 24 |
+
) -> bool:
|
| 25 |
+
"""
|
| 26 |
+
Validate the API key from the request header.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
api_key: The API key from the request header
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
bool: True if the API key is valid
|
| 33 |
+
|
| 34 |
+
Raises:
|
| 35 |
+
HTTPException: If the API key is invalid
|
| 36 |
+
"""
|
| 37 |
+
if not api_key:
|
| 38 |
+
raise HTTPException(
|
| 39 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 40 |
+
detail="API key required",
|
| 41 |
+
headers={"WWW-Authenticate": "ApiKey"},
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
if api_key != settings.API_KEY:
|
| 45 |
+
raise HTTPException(
|
| 46 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 47 |
+
detail="Invalid API key",
|
| 48 |
+
headers={"WWW-Authenticate": "ApiKey"},
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
return True
|
| 52 |
+
|
| 53 |
+
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
|
| 54 |
+
"""
|
| 55 |
+
Create a new JWT access token.
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
data: The data to encode in the token
|
| 59 |
+
expires_delta: Optional expiration time delta
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
str: The encoded JWT token
|
| 63 |
+
"""
|
| 64 |
+
to_encode = data.copy()
|
| 65 |
+
|
| 66 |
+
# Set expiration time
|
| 67 |
+
if expires_delta:
|
| 68 |
+
expire = datetime.utcnow() + expires_delta
|
| 69 |
+
else:
|
| 70 |
+
# Use configured expiration time from settings
|
| 71 |
+
expire = datetime.utcnow() + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
|
| 72 |
+
|
| 73 |
+
to_encode.update({"exp": expire})
|
| 74 |
+
|
| 75 |
+
# Create the JWT token
|
| 76 |
+
encoded_jwt = jwt.encode(
|
| 77 |
+
to_encode,
|
| 78 |
+
settings.SECRET_KEY,
|
| 79 |
+
algorithm=settings.ALGORITHM
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
return encoded_jwt
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
async def get_current_user(
|
| 86 |
+
token: str = Depends(oauth2_scheme),
|
| 87 |
+
db: Session = Depends(get_db)
|
| 88 |
+
) -> User:
|
| 89 |
+
"""
|
| 90 |
+
Get the current authenticated user from the token.
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
token: The JWT token
|
| 94 |
+
db: Database session
|
| 95 |
+
|
| 96 |
+
Returns:
|
| 97 |
+
User: The authenticated user
|
| 98 |
+
|
| 99 |
+
Raises:
|
| 100 |
+
HTTPException: If authentication fails
|
| 101 |
+
"""
|
| 102 |
+
credentials_exception = HTTPException(
|
| 103 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 104 |
+
detail="Could not validate credentials",
|
| 105 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
# Decode the JWT token
|
| 110 |
+
payload = jwt.decode(
|
| 111 |
+
token,
|
| 112 |
+
settings.SECRET_KEY,
|
| 113 |
+
algorithms=[settings.ALGORITHM]
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
# Extract user_id from token
|
| 117 |
+
user_id: str = payload.get("sub")
|
| 118 |
+
if user_id is None:
|
| 119 |
+
raise credentials_exception
|
| 120 |
+
|
| 121 |
+
token_data = TokenData(user_id=user_id)
|
| 122 |
+
except JWTError as e:
|
| 123 |
+
# Log the specific JWT error for debugging
|
| 124 |
+
print(f"JWT validation error: {str(e)}")
|
| 125 |
+
# If it's a signature verification failure, return a specific error
|
| 126 |
+
if "signature" in str(e).lower() or "invalid" in str(e).lower():
|
| 127 |
+
raise HTTPException(
|
| 128 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 129 |
+
detail="Invalid token signature",
|
| 130 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 131 |
+
)
|
| 132 |
+
raise credentials_exception
|
| 133 |
+
|
| 134 |
+
# Get the user from the database
|
| 135 |
+
user = db.query(User).filter(User.id == token_data.user_id).first()
|
| 136 |
+
if user is None:
|
| 137 |
+
print(f"User not found in database: {token_data.user_id}")
|
| 138 |
+
raise credentials_exception
|
| 139 |
+
|
| 140 |
+
# Check if token is expired
|
| 141 |
+
try:
|
| 142 |
+
exp = payload.get("exp")
|
| 143 |
+
if exp is None:
|
| 144 |
+
print(f"Token has no expiration: {user.id}")
|
| 145 |
+
raise credentials_exception
|
| 146 |
+
|
| 147 |
+
expiry_time = datetime.fromtimestamp(exp)
|
| 148 |
+
current_time = datetime.utcnow()
|
| 149 |
+
|
| 150 |
+
# Add detailed logging for token expiration
|
| 151 |
+
time_until_expiry = expiry_time - current_time
|
| 152 |
+
print(f"Token expiration check: current={current_time}, expiry={expiry_time}, seconds_remaining={time_until_expiry.total_seconds()}")
|
| 153 |
+
|
| 154 |
+
if expiry_time < current_time:
|
| 155 |
+
print(f"Token expired for user: {user.id}, expired at {expiry_time}")
|
| 156 |
+
raise HTTPException(
|
| 157 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 158 |
+
detail="Token expired",
|
| 159 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 160 |
+
)
|
| 161 |
+
except Exception as e:
|
| 162 |
+
print(f"Error checking token expiration: {str(e)}")
|
| 163 |
+
raise credentials_exception
|
| 164 |
+
|
| 165 |
+
# Check if user is active
|
| 166 |
+
if not user.is_active:
|
| 167 |
+
raise HTTPException(
|
| 168 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
| 169 |
+
detail="Inactive user"
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
return user
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
async def get_current_active_user(
|
| 176 |
+
current_user: User = Depends(get_current_user)
|
| 177 |
+
) -> User:
|
| 178 |
+
"""
|
| 179 |
+
Get the current active user.
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
current_user: The current authenticated user
|
| 183 |
+
|
| 184 |
+
Returns:
|
| 185 |
+
User: The current active user
|
| 186 |
+
|
| 187 |
+
Raises:
|
| 188 |
+
HTTPException: If the user is inactive
|
| 189 |
+
"""
|
| 190 |
+
if not current_user.is_active:
|
| 191 |
+
raise HTTPException(
|
| 192 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
| 193 |
+
detail="Inactive user"
|
| 194 |
+
)
|
| 195 |
+
return current_user
|
behavior_backend/app/utils/data_utils.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import logging
|
| 5 |
+
from typing import Dict, Any, List, Optional
|
| 6 |
+
|
| 7 |
+
# Fix import paths
|
| 8 |
+
try:
|
| 9 |
+
from app.utils.logging_utils import setup_logger
|
| 10 |
+
except ImportError:
|
| 11 |
+
# Try relative imports for running from project root
|
| 12 |
+
from behavior_backend.app.utils.logging_utils import setup_logger
|
| 13 |
+
|
| 14 |
+
# Configure logging
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
def json_to_dataframe(data: Dict[str, List[Dict[str, Any]]]) -> pd.DataFrame:
|
| 18 |
+
"""
|
| 19 |
+
Convert JSON emotion data to a pandas DataFrame.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
data: Dictionary with backend name as key and list of results as value
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
DataFrame with emotion data
|
| 26 |
+
"""
|
| 27 |
+
all_rows = []
|
| 28 |
+
|
| 29 |
+
for backend_name, results in data.items():
|
| 30 |
+
for result in results:
|
| 31 |
+
if not result:
|
| 32 |
+
continue
|
| 33 |
+
|
| 34 |
+
frame_index = result.get('frame_index', 0)
|
| 35 |
+
|
| 36 |
+
# Handle case where no faces were detected
|
| 37 |
+
if 'faces' not in result or not result['faces']:
|
| 38 |
+
# Check if there's a main_face or main_emotion to use
|
| 39 |
+
if 'main_face' in result and result['main_face']:
|
| 40 |
+
main_face = result['main_face']
|
| 41 |
+
emotion_scores = main_face.get('emotion', {})
|
| 42 |
+
dominant_emotion = main_face.get('dominant_emotion', 'unknown')
|
| 43 |
+
emotion_confidence = main_face.get('emotion_confidence', 0)
|
| 44 |
+
emotion_stable = main_face.get('emotion_stable', False)
|
| 45 |
+
|
| 46 |
+
row = {
|
| 47 |
+
'backend': backend_name,
|
| 48 |
+
'frame_index': frame_index,
|
| 49 |
+
'face_index': 0,
|
| 50 |
+
'dominant_emotion': dominant_emotion,
|
| 51 |
+
'emotion_confidence': emotion_confidence,
|
| 52 |
+
'emotion_stable': emotion_stable,
|
| 53 |
+
'emotion_scores': emotion_scores,
|
| 54 |
+
'face_box': main_face.get('face_box')
|
| 55 |
+
}
|
| 56 |
+
all_rows.append(row)
|
| 57 |
+
continue
|
| 58 |
+
elif 'main_emotion' in result and result['main_emotion']:
|
| 59 |
+
main_emotion = result['main_emotion']
|
| 60 |
+
dominant_emotion = main_emotion.get('emotion', 'unknown')
|
| 61 |
+
emotion_confidence = main_emotion.get('confidence', 0)
|
| 62 |
+
emotion_stable = main_emotion.get('stable', False)
|
| 63 |
+
|
| 64 |
+
# Create a simple emotion_scores dict with just the dominant emotion
|
| 65 |
+
emotion_scores = {dominant_emotion: emotion_confidence} if dominant_emotion != 'unknown' else {}
|
| 66 |
+
|
| 67 |
+
row = {
|
| 68 |
+
'backend': backend_name,
|
| 69 |
+
'frame_index': frame_index,
|
| 70 |
+
'face_index': 0,
|
| 71 |
+
'dominant_emotion': dominant_emotion,
|
| 72 |
+
'emotion_confidence': emotion_confidence,
|
| 73 |
+
'emotion_stable': emotion_stable,
|
| 74 |
+
'emotion_scores': emotion_scores,
|
| 75 |
+
'face_box': None
|
| 76 |
+
}
|
| 77 |
+
all_rows.append(row)
|
| 78 |
+
continue
|
| 79 |
+
else:
|
| 80 |
+
row = {
|
| 81 |
+
'backend': backend_name,
|
| 82 |
+
'frame_index': frame_index,
|
| 83 |
+
'face_index': 0,
|
| 84 |
+
'dominant_emotion': 'unknown',
|
| 85 |
+
'emotion_confidence': 0,
|
| 86 |
+
'emotion_stable': False,
|
| 87 |
+
'emotion_scores': {},
|
| 88 |
+
'face_box': None
|
| 89 |
+
}
|
| 90 |
+
all_rows.append(row)
|
| 91 |
+
continue
|
| 92 |
+
|
| 93 |
+
# Process each face in the frame
|
| 94 |
+
for face_index, face_data in enumerate(result['faces']):
|
| 95 |
+
# Get emotion scores directly
|
| 96 |
+
emotion_scores = face_data.get('emotion', {})
|
| 97 |
+
|
| 98 |
+
# Get dominant emotion directly if available, otherwise calculate it
|
| 99 |
+
dominant_emotion = face_data.get('dominant_emotion', 'unknown')
|
| 100 |
+
emotion_confidence = face_data.get('emotion_confidence', 0)
|
| 101 |
+
emotion_stable = face_data.get('emotion_stable', False)
|
| 102 |
+
|
| 103 |
+
# If dominant_emotion is not available, calculate it
|
| 104 |
+
if dominant_emotion == 'unknown' and emotion_scores:
|
| 105 |
+
max_score = -1
|
| 106 |
+
for emotion, score in emotion_scores.items():
|
| 107 |
+
if score > max_score:
|
| 108 |
+
max_score = score
|
| 109 |
+
dominant_emotion = emotion
|
| 110 |
+
emotion_confidence = max_score
|
| 111 |
+
|
| 112 |
+
row = {
|
| 113 |
+
'backend': backend_name,
|
| 114 |
+
'frame_index': frame_index,
|
| 115 |
+
'face_index': face_index,
|
| 116 |
+
'dominant_emotion': dominant_emotion,
|
| 117 |
+
'emotion_confidence': emotion_confidence,
|
| 118 |
+
'emotion_stable': emotion_stable,
|
| 119 |
+
'emotion_scores': emotion_scores,
|
| 120 |
+
'face_box': face_data.get('face_box')
|
| 121 |
+
}
|
| 122 |
+
all_rows.append(row)
|
| 123 |
+
|
| 124 |
+
# Create DataFrame
|
| 125 |
+
if not all_rows:
|
| 126 |
+
logger.warning("No data to convert to DataFrame")
|
| 127 |
+
return pd.DataFrame()
|
| 128 |
+
|
| 129 |
+
df = pd.DataFrame(all_rows)
|
| 130 |
+
return df
|
| 131 |
+
|
| 132 |
+
def calculate_emotion_percentages(df: pd.DataFrame) -> Dict[str, float]:
|
| 133 |
+
"""
|
| 134 |
+
Calculate percentages of different emotion categories.
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
df: DataFrame with emotion data
|
| 138 |
+
|
| 139 |
+
Returns:
|
| 140 |
+
Dictionary with emotion percentages
|
| 141 |
+
"""
|
| 142 |
+
if df.empty:
|
| 143 |
+
return {
|
| 144 |
+
"angry": 0,
|
| 145 |
+
"disgust": 0,
|
| 146 |
+
"fear": 0,
|
| 147 |
+
"happy": 0,
|
| 148 |
+
"sad": 0,
|
| 149 |
+
"surprise": 0,
|
| 150 |
+
"neutral": 0,
|
| 151 |
+
"positive": 0,
|
| 152 |
+
"negative": 0
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
# Define emotion categories
|
| 156 |
+
positive_emotions = ['happy', 'surprise']
|
| 157 |
+
negative_emotions = ['angry', 'disgust', 'fear', 'sad']
|
| 158 |
+
neutral_emotions = ['neutral']
|
| 159 |
+
all_emotions = positive_emotions + negative_emotions + neutral_emotions
|
| 160 |
+
|
| 161 |
+
# Initialize counters for individual emotions
|
| 162 |
+
emotion_counts = {emotion: 0 for emotion in all_emotions}
|
| 163 |
+
total_frames = len(df)
|
| 164 |
+
|
| 165 |
+
# Count frames by emotion
|
| 166 |
+
for emotion in df['dominant_emotion']:
|
| 167 |
+
if emotion in emotion_counts:
|
| 168 |
+
emotion_counts[emotion] += 1
|
| 169 |
+
|
| 170 |
+
# Calculate individual emotion percentages
|
| 171 |
+
emotion_percentages = {
|
| 172 |
+
emotion: round((count / total_frames) * 100, 2)
|
| 173 |
+
for emotion, count in emotion_counts.items()
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
# Calculate grouped percentages
|
| 177 |
+
positive_count = sum(emotion_counts[emotion] for emotion in positive_emotions)
|
| 178 |
+
negative_count = sum(emotion_counts[emotion] for emotion in negative_emotions)
|
| 179 |
+
neutral_count = sum(emotion_counts[emotion] for emotion in neutral_emotions)
|
| 180 |
+
|
| 181 |
+
# Add grouped percentages
|
| 182 |
+
emotion_percentages.update({
|
| 183 |
+
"positive": round((positive_count / total_frames) * 100, 2) if total_frames > 0 else 0,
|
| 184 |
+
"negative": round((negative_count / total_frames) * 100, 2) if total_frames > 0 else 0,
|
| 185 |
+
"neutral_group": round((neutral_count / total_frames) * 100, 2) if total_frames > 0 else 0
|
| 186 |
+
})
|
| 187 |
+
|
| 188 |
+
# Calculate confidence values if available
|
| 189 |
+
if 'emotion_confidence' in df.columns:
|
| 190 |
+
confidence_by_emotion = {}
|
| 191 |
+
for emotion in all_emotions:
|
| 192 |
+
emotion_df = df[df['dominant_emotion'] == emotion]
|
| 193 |
+
if not emotion_df.empty:
|
| 194 |
+
confidence_by_emotion[emotion] = round(emotion_df['emotion_confidence'].mean(), 2)
|
| 195 |
+
else:
|
| 196 |
+
confidence_by_emotion[emotion] = 0
|
| 197 |
+
|
| 198 |
+
# Calculate average confidence across all emotions
|
| 199 |
+
avg_confidence = round(df['emotion_confidence'].mean(), 2)
|
| 200 |
+
|
| 201 |
+
# Add confidence data to results
|
| 202 |
+
emotion_percentages["average_confidence"] = avg_confidence
|
| 203 |
+
emotion_percentages["confidence_by_emotion"] = confidence_by_emotion
|
| 204 |
+
|
| 205 |
+
return emotion_percentages
|
| 206 |
+
|
| 207 |
+
def format_results_for_api(
|
| 208 |
+
emotion_df: Optional[pd.DataFrame],
|
| 209 |
+
transcript: str,
|
| 210 |
+
analysis: Dict[str, Any]
|
| 211 |
+
) -> Dict[str, Any]:
|
| 212 |
+
"""
|
| 213 |
+
Format results for API response.
|
| 214 |
+
|
| 215 |
+
Args:
|
| 216 |
+
emotion_df: DataFrame with emotion data
|
| 217 |
+
transcript: Transcript text
|
| 218 |
+
analysis: Analysis data
|
| 219 |
+
|
| 220 |
+
Returns:
|
| 221 |
+
Formatted results dictionary
|
| 222 |
+
"""
|
| 223 |
+
if emotion_df is None or emotion_df.empty:
|
| 224 |
+
emotion_percentages = {
|
| 225 |
+
"angry": 0,
|
| 226 |
+
"disgust": 0,
|
| 227 |
+
"fear": 0,
|
| 228 |
+
"happy": 0,
|
| 229 |
+
"sad": 0,
|
| 230 |
+
"surprise": 0,
|
| 231 |
+
"neutral": 0,
|
| 232 |
+
"positive": 0,
|
| 233 |
+
"negative": 0,
|
| 234 |
+
"neutral_group": 0,
|
| 235 |
+
"average_confidence": 0,
|
| 236 |
+
"confidence_by_emotion": {
|
| 237 |
+
"angry": 0,
|
| 238 |
+
"disgust": 0,
|
| 239 |
+
"fear": 0,
|
| 240 |
+
"happy": 0,
|
| 241 |
+
"sad": 0,
|
| 242 |
+
"surprise": 0,
|
| 243 |
+
"neutral": 0
|
| 244 |
+
}
|
| 245 |
+
}
|
| 246 |
+
else:
|
| 247 |
+
emotion_percentages = calculate_emotion_percentages(emotion_df)
|
| 248 |
+
|
| 249 |
+
# Ensure we have confidence data with the right structure for the frontend
|
| 250 |
+
if "confidence_by_emotion" not in emotion_percentages:
|
| 251 |
+
emotion_percentages["confidence_by_emotion"] = {
|
| 252 |
+
"angry": 0,
|
| 253 |
+
"disgust": 0,
|
| 254 |
+
"fear": 0,
|
| 255 |
+
"happy": 0,
|
| 256 |
+
"sad": 0,
|
| 257 |
+
"surprise": 0,
|
| 258 |
+
"neutral": 0
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
if "average_confidence" not in emotion_percentages:
|
| 262 |
+
emotion_percentages["average_confidence"] = 0
|
| 263 |
+
|
| 264 |
+
return {
|
| 265 |
+
"transcript": transcript,
|
| 266 |
+
"emotion_percentages": emotion_percentages,
|
| 267 |
+
"analysis": analysis
|
| 268 |
+
}
|
behavior_backend/app/utils/device_utils.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import time
|
| 3 |
+
import functools
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import psutil
|
| 7 |
+
import gc
|
| 8 |
+
try:
|
| 9 |
+
from app.utils.logging_utils import setup_logger
|
| 10 |
+
except ImportError:
|
| 11 |
+
# Try relative imports for running from project root
|
| 12 |
+
from behavior_backend.app.utils.logging_utils import setup_logger
|
| 13 |
+
|
| 14 |
+
# Configure logging
|
| 15 |
+
logger = setup_logger(__name__)
|
| 16 |
+
|
| 17 |
+
def get_system_memory_info():
|
| 18 |
+
"""
|
| 19 |
+
Get system memory information.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
dict: Memory information
|
| 23 |
+
"""
|
| 24 |
+
memory = psutil.virtual_memory()
|
| 25 |
+
return {
|
| 26 |
+
"total": memory.total / (1024 ** 3), # GB
|
| 27 |
+
"available": memory.available / (1024 ** 3), # GB
|
| 28 |
+
"percent_used": memory.percent,
|
| 29 |
+
"process_usage": psutil.Process(os.getpid()).memory_info().rss / (1024 ** 3) # GB
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
def log_memory_usage(message=""):
|
| 33 |
+
"""
|
| 34 |
+
Log current memory usage.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
message: Optional message to include in the log
|
| 38 |
+
"""
|
| 39 |
+
mem_info = get_system_memory_info()
|
| 40 |
+
logger.info(f"Memory usage {message}: "
|
| 41 |
+
f"Total: {mem_info['total']:.2f}GB, "
|
| 42 |
+
f"Available: {mem_info['available']:.2f}GB, "
|
| 43 |
+
f"Used: {mem_info['percent_used']}%, "
|
| 44 |
+
f"Process: {mem_info['process_usage']:.2f}GB")
|
| 45 |
+
|
| 46 |
+
def get_available_device():
|
| 47 |
+
"""
|
| 48 |
+
Determine the best available device with proper error handling.
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
str: 'cuda', 'mps', or 'cpu' depending on availability
|
| 52 |
+
"""
|
| 53 |
+
logger.info("=== GPU DETECTION ===")
|
| 54 |
+
|
| 55 |
+
# Check available memory first
|
| 56 |
+
mem_info = get_system_memory_info()
|
| 57 |
+
if mem_info['available'] < 2.0: # Less than 2GB available
|
| 58 |
+
logger.warning(f"Low system memory: {mem_info['available']:.2f}GB available. Forcing CPU usage.")
|
| 59 |
+
return "cpu"
|
| 60 |
+
|
| 61 |
+
# First try CUDA (NVIDIA GPUs)
|
| 62 |
+
if torch.cuda.is_available():
|
| 63 |
+
try:
|
| 64 |
+
# Simplified CUDA test with better error handling
|
| 65 |
+
logger.info("CUDA detected - attempting verification")
|
| 66 |
+
# Use a smaller and simpler operation
|
| 67 |
+
test_tensor = torch.tensor([1.0], device="cuda")
|
| 68 |
+
test_tensor = test_tensor + 1.0 # Simple operation
|
| 69 |
+
result = test_tensor.item() # Get the value back to validate operation
|
| 70 |
+
|
| 71 |
+
# If we get here, the CUDA operation worked
|
| 72 |
+
test_tensor = test_tensor.cpu() # Move back to CPU to free CUDA memory
|
| 73 |
+
torch.cuda.empty_cache() # Clear CUDA cache
|
| 74 |
+
logger.info(f" NVIDIA GPU (CUDA) detected and verified working (test result: {result})")
|
| 75 |
+
return "cuda"
|
| 76 |
+
except Exception as e:
|
| 77 |
+
logger.warning(f"CUDA detected but test failed: {e}")
|
| 78 |
+
torch.cuda.empty_cache() # Clear CUDA cache
|
| 79 |
+
|
| 80 |
+
# Then try MPS (Apple Silicon)
|
| 81 |
+
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
| 82 |
+
try:
|
| 83 |
+
# Test MPS with a small operation
|
| 84 |
+
test_tensor = torch.zeros(1).to('mps')
|
| 85 |
+
test_tensor = test_tensor + 1
|
| 86 |
+
test_tensor.cpu() # Move back to CPU to free MPS memory
|
| 87 |
+
logger.info(" Apple Silicon GPU (MPS) detected and verified working")
|
| 88 |
+
return "mps"
|
| 89 |
+
except Exception as e:
|
| 90 |
+
logger.warning(f" MPS detected but test failed: {e}")
|
| 91 |
+
|
| 92 |
+
# Fall back to CPU
|
| 93 |
+
logger.info(" No GPU detected or all GPU tests failed, using CPU")
|
| 94 |
+
return "cpu"
|
| 95 |
+
|
| 96 |
+
def run_on_device(func):
|
| 97 |
+
"""
|
| 98 |
+
Decorator to run a function on the best available device.
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
func: The function to decorate
|
| 102 |
+
|
| 103 |
+
Returns:
|
| 104 |
+
A wrapped function that runs on the best available device
|
| 105 |
+
"""
|
| 106 |
+
@functools.wraps(func)
|
| 107 |
+
def wrapper(*args, **kwargs):
|
| 108 |
+
# Log memory before operation
|
| 109 |
+
log_memory_usage(f"before {func.__name__}")
|
| 110 |
+
|
| 111 |
+
# Force garbage collection before operation
|
| 112 |
+
gc.collect()
|
| 113 |
+
|
| 114 |
+
# Get device if not already specified
|
| 115 |
+
device = get_available_device()
|
| 116 |
+
|
| 117 |
+
# Add device to kwargs if not already present
|
| 118 |
+
if 'device' not in kwargs:
|
| 119 |
+
kwargs['device'] = device
|
| 120 |
+
|
| 121 |
+
try:
|
| 122 |
+
start_time = time.time()
|
| 123 |
+
result = func(*args, **kwargs)
|
| 124 |
+
end_time = time.time()
|
| 125 |
+
|
| 126 |
+
logger.debug(f"Function {func.__name__} ran on {device} in {end_time - start_time:.4f} seconds")
|
| 127 |
+
return result
|
| 128 |
+
except Exception as e:
|
| 129 |
+
# Check if this is the SparseMPS error
|
| 130 |
+
if "SparseMPS" in str(e) and device == "mps":
|
| 131 |
+
logger.warning(f"MPS error detected: {e}")
|
| 132 |
+
logger.warning("Falling back to CPU for this operation")
|
| 133 |
+
|
| 134 |
+
# Update device to CPU and retry
|
| 135 |
+
kwargs['device'] = 'cpu'
|
| 136 |
+
|
| 137 |
+
# Force garbage collection before retry
|
| 138 |
+
gc.collect()
|
| 139 |
+
|
| 140 |
+
start_time = time.time()
|
| 141 |
+
result = func(*args, **kwargs)
|
| 142 |
+
end_time = time.time()
|
| 143 |
+
|
| 144 |
+
logger.debug(f"Function {func.__name__} ran on CPU (fallback) in {end_time - start_time:.4f} seconds")
|
| 145 |
+
return result
|
| 146 |
+
else:
|
| 147 |
+
# Re-raise other exceptions
|
| 148 |
+
raise
|
| 149 |
+
finally:
|
| 150 |
+
# Force garbage collection after operation
|
| 151 |
+
gc.collect()
|
| 152 |
+
if device == 'cuda':
|
| 153 |
+
torch.cuda.empty_cache()
|
| 154 |
+
|
| 155 |
+
# Log memory after operation
|
| 156 |
+
log_memory_usage(f"after {func.__name__}")
|
| 157 |
+
|
| 158 |
+
return wrapper
|
| 159 |
+
|
| 160 |
+
# Initialize device once at module level
|
| 161 |
+
device = get_available_device()
|
behavior_backend/app/utils/file_utils.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import cv2
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from fastapi import UploadFile
|
| 6 |
+
import uuid
|
| 7 |
+
|
| 8 |
+
from app.core.config import settings
|
| 9 |
+
|
| 10 |
+
def save_upload_file(file: UploadFile) -> tuple[str, Path]:
|
| 11 |
+
"""
|
| 12 |
+
Save an uploaded file to the upload directory.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
file: The uploaded file
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
Tuple of (video_id, file_path)
|
| 19 |
+
"""
|
| 20 |
+
# Generate unique ID for the video
|
| 21 |
+
video_id = str(uuid.uuid4())
|
| 22 |
+
file_extension = os.path.splitext(file.filename)[1] if file.filename else ""
|
| 23 |
+
|
| 24 |
+
# Save the uploaded file
|
| 25 |
+
upload_path = settings.UPLOAD_DIR / f"{video_id}{file_extension}"
|
| 26 |
+
with open(upload_path, "wb") as buffer:
|
| 27 |
+
shutil.copyfileobj(file.file, buffer)
|
| 28 |
+
|
| 29 |
+
return video_id, upload_path
|
| 30 |
+
|
| 31 |
+
def get_video_duration(video_path: str) -> float:
|
| 32 |
+
"""
|
| 33 |
+
Extract video duration using OpenCV.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
video_path: Path to the video file
|
| 37 |
+
|
| 38 |
+
Returns:
|
| 39 |
+
Duration of the video in seconds
|
| 40 |
+
"""
|
| 41 |
+
cap = cv2.VideoCapture(video_path)
|
| 42 |
+
if not cap.isOpened():
|
| 43 |
+
return 0.0 # Default to 0 if video cannot be opened
|
| 44 |
+
|
| 45 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 46 |
+
frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
|
| 47 |
+
cap.release()
|
| 48 |
+
|
| 49 |
+
return frame_count / fps if fps > 0 else 0.0
|
behavior_backend/app/utils/logging_utils.py
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import time
|
| 3 |
+
import csv
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
|
| 8 |
+
"""
|
| 9 |
+
Logging Utilities Module
|
| 10 |
+
========================
|
| 11 |
+
|
| 12 |
+
This module provides enhanced logging capabilities for the behavior analytics application.
|
| 13 |
+
Features include:
|
| 14 |
+
- Emoji-enhanced log messages for better visual identification
|
| 15 |
+
- Daily log file rotation with date-based filenames
|
| 16 |
+
- Dual logging to both console and files
|
| 17 |
+
- Performance timing and measurement utilities
|
| 18 |
+
- Custom log levels and formatting
|
| 19 |
+
|
| 20 |
+
Usage:
|
| 21 |
+
------
|
| 22 |
+
# Basic setup with both console and file logging
|
| 23 |
+
logger = setup_logger("my_module")
|
| 24 |
+
|
| 25 |
+
# Log at different levels with automatic emoji inclusion
|
| 26 |
+
logger.debug("Debugging information")
|
| 27 |
+
logger.info("General information")
|
| 28 |
+
logger.warning("Warning message")
|
| 29 |
+
logger.error("Error occurred")
|
| 30 |
+
logger.critical("Critical failure")
|
| 31 |
+
|
| 32 |
+
# Log success messages with checkmark emoji
|
| 33 |
+
log_success(logger, "Operation completed successfully")
|
| 34 |
+
|
| 35 |
+
# Measure function execution time
|
| 36 |
+
@time_it
|
| 37 |
+
def my_function():
|
| 38 |
+
# Function code here
|
| 39 |
+
pass
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
# Configure logging
|
| 43 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
| 44 |
+
logger = logging.getLogger(__name__)
|
| 45 |
+
|
| 46 |
+
# Create logs directory if it doesn't exist
|
| 47 |
+
logs_dir = Path("logs")
|
| 48 |
+
logs_dir.mkdir(exist_ok=True)
|
| 49 |
+
|
| 50 |
+
# Time logs file
|
| 51 |
+
TIME_LOGS_FILE = logs_dir / "time_logs.csv"
|
| 52 |
+
|
| 53 |
+
# Emoji mappings for different log levels
|
| 54 |
+
LOG_EMOJIS = {
|
| 55 |
+
'DEBUG': 'debug',
|
| 56 |
+
'INFO': 'info',
|
| 57 |
+
'WARNING': 'warning',
|
| 58 |
+
'ERROR': 'error',
|
| 59 |
+
'CRITICAL': 'critical',
|
| 60 |
+
'SUCCESS': 'success',
|
| 61 |
+
'TIMER': 'timer'
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
def get_daily_log_filename(base_name="app"):
|
| 65 |
+
"""
|
| 66 |
+
Generate a log filename with the current date.
|
| 67 |
+
|
| 68 |
+
The function creates a filename in the format: {base_name}_{YYYY-MM-DD}.log
|
| 69 |
+
This ensures that logs are automatically separated by day, making it easier
|
| 70 |
+
to manage log files and implement log rotation policies.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
base_name (str): Base name for the log file, defaults to "app"
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
Path: Path object for the log file with current date
|
| 77 |
+
|
| 78 |
+
Example:
|
| 79 |
+
>>> get_daily_log_filename("api")
|
| 80 |
+
Path('logs/api_2023-11-15.log')
|
| 81 |
+
"""
|
| 82 |
+
today = datetime.now().strftime("%Y-%m-%d")
|
| 83 |
+
return logs_dir / f"{base_name}_{today}.log"
|
| 84 |
+
|
| 85 |
+
def setup_logger(name, log_file=None, level=logging.INFO, enable_console=True, enable_file=True):
|
| 86 |
+
"""
|
| 87 |
+
Set up a logger with file and console handlers.
|
| 88 |
+
|
| 89 |
+
This function configures a logger with the specified name and adds handlers for
|
| 90 |
+
console output and/or file output based on the parameters. It also adds emoji
|
| 91 |
+
support to make logs more visually informative.
|
| 92 |
+
|
| 93 |
+
Args:
|
| 94 |
+
name (str): Logger name, typically the module name using __name__
|
| 95 |
+
log_file (str, optional): Path to log file. If None and enable_file is True,
|
| 96 |
+
a daily log file will be used. Defaults to None.
|
| 97 |
+
level (int): Logging level (e.g., logging.INFO, logging.DEBUG).
|
| 98 |
+
Defaults to logging.INFO.
|
| 99 |
+
enable_console (bool): Whether to enable console logging. Defaults to True.
|
| 100 |
+
enable_file (bool): Whether to enable file logging. Defaults to True.
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
logging.Logger: Configured logger instance
|
| 104 |
+
|
| 105 |
+
Example:
|
| 106 |
+
>>> # Basic usage with both console and file logging
|
| 107 |
+
>>> logger = setup_logger("my_module")
|
| 108 |
+
>>>
|
| 109 |
+
>>> # Console only (no file logging)
|
| 110 |
+
>>> logger = setup_logger("console_only", enable_file=False)
|
| 111 |
+
>>>
|
| 112 |
+
>>> # File only with custom file path
|
| 113 |
+
>>> logger = setup_logger("file_only", log_file="custom.log", enable_console=False)
|
| 114 |
+
"""
|
| 115 |
+
logger = logging.getLogger(name)
|
| 116 |
+
logger.setLevel(level)
|
| 117 |
+
|
| 118 |
+
# Remove existing handlers if any
|
| 119 |
+
for handler in logger.handlers[:]:
|
| 120 |
+
logger.removeHandler(handler)
|
| 121 |
+
|
| 122 |
+
# Format with emojis
|
| 123 |
+
log_format = '%(asctime)s - %(name)s - %(emoji)s %(levelname)s - %(message)s'
|
| 124 |
+
|
| 125 |
+
# Create a filter to add emoji to the record
|
| 126 |
+
class EmojiFilter(logging.Filter):
|
| 127 |
+
"""
|
| 128 |
+
Filter that adds an emoji field to the log record based on the log level.
|
| 129 |
+
|
| 130 |
+
This filter enriches log records with emojis corresponding to their log levels,
|
| 131 |
+
making logs more visually informative and easier to scan.
|
| 132 |
+
"""
|
| 133 |
+
def filter(self, record):
|
| 134 |
+
"""
|
| 135 |
+
Add an emoji attribute to the log record.
|
| 136 |
+
|
| 137 |
+
Args:
|
| 138 |
+
record (LogRecord): The log record to filter
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
bool: Always returns True to include the record
|
| 142 |
+
"""
|
| 143 |
+
record.emoji = LOG_EMOJIS.get(record.levelname, '')
|
| 144 |
+
return True
|
| 145 |
+
|
| 146 |
+
# Create console handler
|
| 147 |
+
if enable_console:
|
| 148 |
+
console_handler = logging.StreamHandler()
|
| 149 |
+
console_handler.setLevel(level)
|
| 150 |
+
console_formatter = logging.Formatter(log_format)
|
| 151 |
+
console_handler.setFormatter(console_formatter)
|
| 152 |
+
console_handler.addFilter(EmojiFilter())
|
| 153 |
+
logger.addHandler(console_handler)
|
| 154 |
+
|
| 155 |
+
# Create file handler
|
| 156 |
+
if enable_file:
|
| 157 |
+
# Use provided log_file or generate daily log file
|
| 158 |
+
file_path = log_file if log_file else get_daily_log_filename()
|
| 159 |
+
file_handler = logging.FileHandler(file_path)
|
| 160 |
+
file_handler.setLevel(level)
|
| 161 |
+
file_formatter = logging.Formatter(log_format)
|
| 162 |
+
file_handler.setFormatter(file_formatter)
|
| 163 |
+
file_handler.addFilter(EmojiFilter())
|
| 164 |
+
logger.addHandler(file_handler)
|
| 165 |
+
|
| 166 |
+
return logger
|
| 167 |
+
|
| 168 |
+
def log_time(function_name, time_taken):
|
| 169 |
+
"""
|
| 170 |
+
Log the time taken by a function to a CSV file and to the logger.
|
| 171 |
+
|
| 172 |
+
This function records performance metrics for functions, storing them in a CSV file
|
| 173 |
+
for later analysis and also outputting them to the log with a timer emoji.
|
| 174 |
+
|
| 175 |
+
Args:
|
| 176 |
+
function_name (str): Name of the function being timed
|
| 177 |
+
time_taken (float): Time taken in seconds
|
| 178 |
+
|
| 179 |
+
Example:
|
| 180 |
+
>>> log_time("process_video", 2.345)
|
| 181 |
+
# Writes to CSV and logs: "⏱️ Function process_video took 2.3450 seconds"
|
| 182 |
+
"""
|
| 183 |
+
# Create file with headers if it doesn't exist
|
| 184 |
+
if not os.path.exists(TIME_LOGS_FILE):
|
| 185 |
+
with open(TIME_LOGS_FILE, 'w', newline='') as f:
|
| 186 |
+
writer = csv.writer(f)
|
| 187 |
+
writer.writerow(['timestamp', 'function', 'time_taken_seconds'])
|
| 188 |
+
|
| 189 |
+
# Append time log
|
| 190 |
+
with open(TIME_LOGS_FILE, 'a', newline='') as f:
|
| 191 |
+
writer = csv.writer(f)
|
| 192 |
+
writer.writerow([datetime.now().isoformat(), function_name, time_taken])
|
| 193 |
+
|
| 194 |
+
logger.info(f"{LOG_EMOJIS['TIMER']} Function {function_name} took {time_taken:.4f} seconds")
|
| 195 |
+
|
| 196 |
+
def time_it(func):
|
| 197 |
+
"""
|
| 198 |
+
Decorator to measure and log the execution time of a function.
|
| 199 |
+
|
| 200 |
+
This decorator wraps a function to measure its execution time and automatically
|
| 201 |
+
log the results using the log_time function. It's a convenient way to add
|
| 202 |
+
performance monitoring to any function.
|
| 203 |
+
|
| 204 |
+
Args:
|
| 205 |
+
func (callable): The function to decorate
|
| 206 |
+
|
| 207 |
+
Returns:
|
| 208 |
+
callable: A wrapped function that logs its execution time
|
| 209 |
+
|
| 210 |
+
Example:
|
| 211 |
+
>>> @time_it
|
| 212 |
+
>>> def process_data(data):
|
| 213 |
+
>>> # Process data here
|
| 214 |
+
>>> return result
|
| 215 |
+
>>>
|
| 216 |
+
>>> # When called, will automatically log execution time
|
| 217 |
+
>>> result = process_data(my_data)
|
| 218 |
+
"""
|
| 219 |
+
def wrapper(*args, **kwargs):
|
| 220 |
+
"""
|
| 221 |
+
Wrapper function that times the execution of the decorated function.
|
| 222 |
+
|
| 223 |
+
Args:
|
| 224 |
+
*args: Arguments to pass to the decorated function
|
| 225 |
+
**kwargs: Keyword arguments to pass to the decorated function
|
| 226 |
+
|
| 227 |
+
Returns:
|
| 228 |
+
Any: The return value of the decorated function
|
| 229 |
+
"""
|
| 230 |
+
start_time = time.time()
|
| 231 |
+
result = func(*args, **kwargs)
|
| 232 |
+
end_time = time.time()
|
| 233 |
+
time_taken = end_time - start_time
|
| 234 |
+
log_time(func.__name__, time_taken)
|
| 235 |
+
return result
|
| 236 |
+
return wrapper
|
| 237 |
+
|
| 238 |
+
def log_success(logger, message, *args, **kwargs):
|
| 239 |
+
"""
|
| 240 |
+
Log a success message with a checkmark emoji.
|
| 241 |
+
|
| 242 |
+
This function provides a convenient way to log successful operations with
|
| 243 |
+
a distinctive checkmark emoji, making success messages stand out in the logs.
|
| 244 |
+
|
| 245 |
+
Args:
|
| 246 |
+
logger (logging.Logger): Logger instance to use
|
| 247 |
+
message (str): Message to log
|
| 248 |
+
*args: Additional positional arguments for logger.info
|
| 249 |
+
**kwargs: Additional keyword arguments for logger.info
|
| 250 |
+
|
| 251 |
+
Example:
|
| 252 |
+
>>> logger = setup_logger("my_module")
|
| 253 |
+
>>> log_success(logger, "User registration completed for user_id={}", user_id)
|
| 254 |
+
# Logs: "✅ User registration completed for user_id=123"
|
| 255 |
+
"""
|
| 256 |
+
logger.info(f"{LOG_EMOJIS['SUCCESS']} {message}", *args, **kwargs)
|