pykara commited on
Commit
3ee98d5
·
0 Parent(s):

Initial Py-Detect backend with FAISS indexes

Browse files
.dockerignore ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # VCS/metadata
2
+ .git
3
+ .gitignore
4
+
5
+ # IDE
6
+ .vscode
7
+ .idea
8
+
9
+ # Python caches
10
+ __pycache__/
11
+ *.py[cod]
12
+ *.log
13
+
14
+ # Virtual envs
15
+ .venv/
16
+ env/
17
+ ENV/
18
+
19
+ # OS junk
20
+ .DS_Store
21
+
22
+ # Build/output
23
+ dist/
24
+ build/
25
+ .cache/
26
+
27
+ # Local env files (do not send secrets into the image)
28
+ .env
29
+ .env.local
30
+ .env.*.local
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.faiss filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ *.egg-info/
7
+ .eggs/
8
+ .build/
9
+ dist/
10
+ .cache/
11
+
12
+ # Virtual envs
13
+ .venv/
14
+ venv/
15
+ ENV/
16
+ env/
17
+
18
+ # OS / IDE
19
+ .DS_Store
20
+ Thumbs.db
21
+ .idea/
22
+ .vscode/
23
+
24
+ # Environment files (keep .env.example tracked)
25
+ .env
26
+ .env.local
27
+ .env.*.local
28
+
29
+ # Logs
30
+ *.log
31
+
32
+ # Test/Tool caches
33
+ .pytest_cache/
34
+ .mypy_cache/
35
+ .ruff_cache/
36
+ .ipynb_checkpoints/
37
+
38
+ # Coverage / tox / hypothesis
39
+ .coverage
40
+ htmlcov/
41
+ .tox/
42
+ .hypothesis/
43
+
44
+ # SQLite (if any)
45
+ *.sqlite3
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # System packages for pyodbc, OpenCV, etc.
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ build-essential \
6
+ unixodbc-dev \
7
+ ffmpeg \
8
+ libsm6 \
9
+ libxext6 \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ WORKDIR /app
13
+
14
+ # Install Python dependencies
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy the rest of the backend code and data files
19
+ COPY . .
20
+
21
+ EXPOSE 7860
22
+
23
+ # Start the Flask app
24
+ CMD ["python", "app.py"]
Manual on Investigative Interviewing for Criminal Investigation.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e62079a636abf5b777b4864c1485649c3c5be34212c5b6656e8421686de064a
3
+ size 247341
Manual on Investigative Interviewing for Criminal Investigation.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:017e508fdc2c5e72c496528ef49488411d133b9901b43bd7717868be66ccbe3e
3
+ size 164127
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Py Detect Backend
3
+ emoji: ⚡
4
+ colorFrom: pink
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,1240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import base64
4
+ import uuid
5
+ import pickle
6
+ import requests
7
+ import pyodbc
8
+ import faiss
9
+ import numpy as np
10
+ import cv2
11
+ from mtcnn import MTCNN
12
+ from fer import FER
13
+ from dotenv import load_dotenv
14
+ from flask import Flask, request, jsonify
15
+ from werkzeug.security import generate_password_hash, check_password_hash
16
+ from flask_cors import CORS
17
+ from sentence_transformers import SentenceTransformer
18
+ import json
19
+
20
+ # ------------------------------------------------------------
21
+ # INITIAL SETUP
22
+ # ------------------------------------------------------------
23
+ app = Flask(__name__)
24
+
25
+ CORS(app,
26
+ resources={r"/*": {"origins": "*"}},
27
+ supports_credentials=True,
28
+ allow_headers=["Content-Type", "Authorization"],
29
+ expose_headers=["Content-Type", "Authorization"],
30
+ methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"])
31
+
32
+ @app.before_request
33
+ def handle_options_request():
34
+ if request.method == "OPTIONS":
35
+ return jsonify({"status": "CORS Preflight OK"}), 200
36
+
37
+ # ------------------------------------------------------------
38
+ # ENVIRONMENT VARIABLES
39
+ # ------------------------------------------------------------
40
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
41
+ load_dotenv(os.path.join(BASE_DIR, ".env"))
42
+
43
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
44
+ if not OPENAI_API_KEY:
45
+ print("⚠️ Warning: OPENAI_API_KEY not found. OpenAI features will be disabled.")
46
+
47
+ # ------------------------------------------------------------
48
+ # DATABASE CONNECTION (AUTO-DETECTION)
49
+ # ------------------------------------------------------------
50
+ print("🔍 Checking SQL Server connectivity...")
51
+
52
+ # Try both ODBC Driver 17 and 18
53
+ possible_drivers = ['{ODBC Driver 18 for SQL Server}', '{ODBC Driver 17 for SQL Server}']
54
+ driver = None
55
+ for d in possible_drivers:
56
+ try:
57
+ if d.strip('{}') in pyodbc.drivers():
58
+ driver = d
59
+ break
60
+ except Exception:
61
+ pass
62
+
63
+ if not driver:
64
+ driver = '{ODBC Driver 17 for SQL Server}'
65
+ print("⚠️ Defaulting to ODBC Driver 17 for SQL Server")
66
+
67
+ # Candidate SQL Server instances
68
+ test_servers = [
69
+ r'localhost\SQLEXPRESS',
70
+ r'localhost\MSSQLSERVER',
71
+ r'localhost',
72
+ r'127.0.0.1'
73
+ ]
74
+
75
+ database = 'PyDetect'
76
+
77
+ def get_db_connection():
78
+ """Try multiple connection methods until success."""
79
+ for s in test_servers:
80
+ try:
81
+ conn = pyodbc.connect(
82
+ f'DRIVER={driver};SERVER={s};DATABASE={database};Trusted_Connection=yes;',
83
+ timeout=3
84
+ )
85
+ print(f"✅ Connected to SQL Server instance: {s}")
86
+ return conn
87
+ except pyodbc.OperationalError:
88
+ continue
89
+ raise ConnectionError("❌ Cannot connect to any SQL Server instance. Please ensure SQL Server is running.")
90
+
91
+ def create_user_table():
92
+ try:
93
+ conn = get_db_connection()
94
+ cursor = conn.cursor()
95
+ cursor.execute('''
96
+ IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='Users' AND xtype='U')
97
+ CREATE TABLE Users (
98
+ id INT IDENTITY(1,1) PRIMARY KEY,
99
+ name NVARCHAR(120) NOT NULL,
100
+ role NVARCHAR(50) NOT NULL,
101
+ email NVARCHAR(120) UNIQUE NOT NULL,
102
+ password NVARCHAR(255) NOT NULL
103
+ )
104
+ ''')
105
+ conn.commit()
106
+ cursor.close()
107
+ conn.close()
108
+ print("✅ Users table verified/created successfully.")
109
+ except Exception as e:
110
+ print(f"❌ Database setup failed: {str(e)}")
111
+
112
+ create_user_table()
113
+
114
+ # ------------------------------------------------------------
115
+ # LOAD VECTOR INDEX AND CHUNKS
116
+ # ------------------------------------------------------------
117
+ MODEL = SentenceTransformer('all-MiniLM-L6-v2')
118
+ #Paths for the old and new files
119
+ FAISS_PATH = os.path.join(BASE_DIR, "crime_scene_index.faiss")
120
+ CHUNKS_PATH = os.path.join(BASE_DIR, "crime_scene_chunks.pkl")
121
+ NEW_FAISS_PATH = os.path.join(BASE_DIR, "Manual on Investigative Interviewing for Criminal Investigation.faiss")
122
+ NEW_CHUNKS_PATH = os.path.join(BASE_DIR, "Manual on Investigative Interviewing for Criminal Investigation.pkl")
123
+
124
+ # Load old FAISS index and text chunks
125
+ if os.path.exists(FAISS_PATH) and os.path.exists(CHUNKS_PATH):
126
+ print("📘 Loading old FAISS index and text chunks...")
127
+ old_index = faiss.read_index(FAISS_PATH)
128
+ with open(CHUNKS_PATH, "rb") as f:
129
+ old_text_chunks = pickle.load(f)
130
+ print(f"✅ Loaded {len(old_text_chunks)} chunks from the old reference guide.")
131
+ else:
132
+ old_index = None
133
+ old_text_chunks = []
134
+ print("⚠️ Old FAISS or chunks file not found. Context retrieval disabled.")
135
+
136
+ # Load new FAISS index and text chunks
137
+ if os.path.exists(NEW_FAISS_PATH) and os.path.exists(NEW_CHUNKS_PATH):
138
+ print("📘 Loading new FAISS index and text chunks...")
139
+ new_index = faiss.read_index(NEW_FAISS_PATH)
140
+ with open(NEW_CHUNKS_PATH, "rb") as f:
141
+ new_text_chunks = pickle.load(f)
142
+ print(f"✅ Loaded {len(new_text_chunks)} chunks from the new reference guide.")
143
+ else:
144
+ new_index = None
145
+ new_text_chunks = []
146
+ print("⚠️ New FAISS or chunks file not found. Context retrieval for new book is disabled.")
147
+
148
+ # ------------------------------------------------------------
149
+ # BODY LANGUAGE BOOK FAISS INDEX (using provided FAISS file)
150
+ # ------------------------------------------------------------
151
+ BODY_BOOK_FAISS_PATH = os.path.join(BASE_DIR, "what-everybody-is-saying.faiss")
152
+ BODY_BOOK_CHUNKS_PATH = os.path.join(BASE_DIR, "what-everybody-is-saying_chunks.pkl")
153
+ MODEL_BODY = SentenceTransformer('all-MiniLM-L6-v2')
154
+
155
+ if os.path.exists(BODY_BOOK_FAISS_PATH) and os.path.exists(BODY_BOOK_CHUNKS_PATH):
156
+ body_book_index = faiss.read_index(BODY_BOOK_FAISS_PATH)
157
+ with open(BODY_BOOK_CHUNKS_PATH, "rb") as f:
158
+ body_book_entries = pickle.load(f)
159
+ print(f"✅ Loaded body language FAISS index and chunks: {len(body_book_entries)} entries.")
160
+ else:
161
+ body_book_index = None
162
+ body_book_entries = []
163
+ print("⚠️ Body language FAISS or chunks file not found. Context retrieval disabled.")
164
+
165
+ # ------------------------------------------------------------
166
+ # HELPER FUNCTIONS
167
+ # ------------------------------------------------------------
168
+ # --- Computer Vision: lightweight face analysis (OpenCV Haar cascades) ---
169
+
170
+ detector_mtcnn = MTCNN()
171
+ fer_detector = FER()
172
+
173
+ def analyze_frame_mtcnn(image_bgr, previous=None):
174
+ """Analyze a single BGR frame using MTCNN. Returns metrics dict."""
175
+ if image_bgr is None:
176
+ return {
177
+ "face_present": False,
178
+ "faces_count": 0,
179
+ "jitter": None,
180
+ "face_box": None,
181
+ "quality": 0,
182
+ "behavior_tags": ["no_face"],
183
+ "investigative_expression": "no_face"
184
+ }
185
+ image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
186
+ faces = detector_mtcnn.detect_faces(image_rgb)
187
+ if not faces:
188
+ return {
189
+ "face_present": False,
190
+ "faces_count": 0,
191
+ "jitter": None,
192
+ "face_box": None,
193
+ "quality": 0,
194
+ "behavior_tags": ["no_face"],
195
+ "investigative_expression": "absence"
196
+ }
197
+ # Choose largest face
198
+ face = max(faces, key=lambda f: f['box'][2] * f['box'][3])
199
+ x, y, w, h = face['box']
200
+ # Jitter (movement between frames) normalized by face width
201
+ cx, cy = x + w / 2.0, y + h / 2.0
202
+ jitter = None
203
+ if previous and previous.get("face_box"):
204
+ px, py, pw, ph = previous["face_box"]
205
+ pcx, pcy = px + pw / 2.0, py + ph / 2.0
206
+ dist = ((cx - pcx) ** 2 + (cy - pcy) ** 2) ** 0.5
207
+ jitter = float(dist / max(1.0, w))
208
+ # Heuristic quality: use face confidence
209
+ quality = round(face.get('confidence', 0) * 100, 1)
210
+ tags = []
211
+ if jitter is not None:
212
+ if jitter > 0.08:
213
+ tags.append("stress_head_movement")
214
+ elif jitter > 0.04:
215
+ tags.append("elevated_movement")
216
+ if not tags:
217
+ tags.append("baseline")
218
+
219
+ # Map tags to investigative_expression (similar to original logic)
220
+ if "stress_head_movement" in tags:
221
+ investigative_expression = "stress"
222
+ elif "elevated_movement" in tags:
223
+ investigative_expression = "elevated"
224
+ elif "baseline" in tags:
225
+ investigative_expression = "neutral"
226
+ else:
227
+ investigative_expression = tags[0] if tags else "unknown"
228
+
229
+ return {
230
+ "face_present": True,
231
+ "faces_count": len(faces),
232
+ "jitter": round(jitter, 4) if jitter is not None else None,
233
+ "face_box": [int(x), int(y), int(w), int(h)],
234
+ "quality": quality,
235
+ "behavior_tags": tags,
236
+ "investigative_expression": investigative_expression
237
+ }
238
+ try:
239
+ FACE_CASCADE = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
240
+ EYE_CASCADE = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")
241
+ SMILE_CASCADE = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_smile.xml")
242
+ except Exception:
243
+ FACE_CASCADE = None
244
+ EYE_CASCADE = None
245
+ SMILE_CASCADE = None
246
+
247
+ def _variance_of_laplacian(image_gray):
248
+ return float(cv2.Laplacian(image_gray, cv2.CV_64F).var())
249
+
250
+ def analyze_frame(image_bgr, previous=None):
251
+ """Analyze a single BGR frame. Returns metrics dict.
252
+ previous: optional dict from prior frame to compute jitter.
253
+ """
254
+ if image_bgr is None or FACE_CASCADE is None:
255
+ return {
256
+ "face_present": False,
257
+ "faces_count": 0,
258
+ "jitter": None,
259
+ "blur": None,
260
+ "brightness": None,
261
+ "eyes": 0,
262
+ "smile": 0,
263
+ "face_box": None,
264
+ "quality": 0,
265
+ "expression": "no_face",
266
+ "investigative_expression": "no_face",
267
+ "behavior_tags": ["no_face"],
268
+ "emotion": None
269
+ }
270
+
271
+ gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
272
+ faces = FACE_CASCADE.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5, minSize=(60, 60))
273
+
274
+ # Basic quality metrics
275
+ blur_val = _variance_of_laplacian(gray)
276
+ # Normalize blur to 0-100 range heuristically
277
+ quality = max(0, min(100, (blur_val / 200.0) * 100))
278
+ brightness = float(np.mean(gray))
279
+
280
+ if len(faces) == 0:
281
+ return {
282
+ "face_present": False,
283
+ "faces_count": 0,
284
+ "jitter": None,
285
+ "blur": round(blur_val, 2),
286
+ "brightness": round(brightness, 2),
287
+ "eyes": 0,
288
+ "smile": 0,
289
+ "face_box": None,
290
+ "quality": round(quality, 1),
291
+ "expression": "no_face",
292
+ "investigative_expression": "absence",
293
+ "behavior_tags": ["avoidance", "absence"],
294
+ "emotion": None
295
+ }
296
+
297
+ # Choose largest face
298
+ x, y, w, h = max(faces, key=lambda b: b[2] * b[3])
299
+ face_roi_gray = gray[y:y+h, x:x+w]
300
+
301
+ eyes = EYE_CASCADE.detectMultiScale(face_roi_gray, scaleFactor=1.1, minNeighbors=5, minSize=(20, 20)) if EYE_CASCADE is not None else []
302
+ smiles = SMILE_CASCADE.detectMultiScale(face_roi_gray, scaleFactor=1.3, minNeighbors=20) if SMILE_CASCADE is not None else []
303
+
304
+ # Jitter (movement between frames) normalized by face width
305
+ cx, cy = x + w / 2.0, y + h / 2.0
306
+ jitter = None
307
+ if previous and previous.get("face_box"):
308
+ px, py, pw, ph = previous["face_box"]
309
+ pcx, pcy = px + pw / 2.0, py + ph / 2.0
310
+ dist = ((cx - pcx) ** 2 + (cy - pcy) ** 2) ** 0.5
311
+ jitter = float(dist / max(1.0, w)) # normalized
312
+
313
+ # FER emotion detection
314
+ emotion = None
315
+ try:
316
+ fer_results = fer_detector.detect_emotions(image_bgr)
317
+ if fer_results:
318
+ emotions = fer_results[0]["emotions"]
319
+ emotion = max(emotions, key=emotions.get)
320
+ except Exception:
321
+ emotion = None
322
+
323
+ # Investigative-oriented heuristic classification
324
+ eyes_cnt = int(len(eyes) if eyes is not None else 0)
325
+ smile_cnt = int(len(smiles) if smiles is not None else 0)
326
+ expr_basic = "neutral"
327
+ if smile_cnt >= 1 and quality >= 40:
328
+ expr_basic = "smiling"
329
+ elif smile_cnt == 0 and eyes_cnt >= 1:
330
+ expr_basic = "flat"
331
+
332
+ tags = []
333
+ # Head movement / jitter cues
334
+ if jitter is not None:
335
+ if jitter > 0.08:
336
+ tags.append("stress_head_movement")
337
+ elif jitter > 0.04:
338
+ tags.append("elevated_movement")
339
+ # Avoidance (low eyes or poor quality + no smile)
340
+ if eyes_cnt == 0 and smile_cnt == 0:
341
+ tags.append("possible_avoidance")
342
+ # Masking smile (smile plus low eyes or movement)
343
+ if smile_cnt >= 1 and (eyes_cnt <= 1 or (jitter is not None and jitter > 0.04)):
344
+ tags.append("masking_smile")
345
+ # Calm/composed
346
+ if jitter is not None and jitter <= 0.02 and smile_cnt == 0 and eyes_cnt >= 2:
347
+ tags.append("composed")
348
+ # Potential concealment (smile with minimal eye engagement)
349
+ if smile_cnt >= 1 and eyes_cnt == 0:
350
+ tags.append("potential_concealment")
351
+ if not tags:
352
+ tags.append("baseline")
353
+
354
+ # Derive a single investigative_expression label preference order
355
+ investigative_expression = (
356
+ "masking_smile" if "masking_smile" in tags else
357
+ "stress" if "stress_head_movement" in tags else
358
+ "avoidance" if "possible_avoidance" in tags else
359
+ "concealment" if "potential_concealment" in tags else
360
+ "composed" if "composed" in tags else
361
+ expr_basic
362
+ )
363
+
364
+ return {
365
+ "face_present": True,
366
+ "faces_count": int(len(faces)),
367
+ "jitter": round(jitter, 4) if jitter is not None else None,
368
+ "blur": round(blur_val, 2),
369
+ "brightness": round(brightness, 2),
370
+ "eyes": eyes_cnt,
371
+ "smile": smile_cnt,
372
+ "face_box": [int(x), int(y), int(w), int(h)],
373
+ "quality": round(quality, 1),
374
+ "expression": expr_basic, # keep backward-compatible key
375
+ "investigative_expression": investigative_expression,
376
+ "behavior_tags": tags,
377
+ "emotion": emotion
378
+ }
379
+
380
+ def recommend_command(metrics):
381
+ """Derive a simple guidance command based on metrics."""
382
+ if not metrics or not metrics.get("face_present"):
383
+ return "Please position your face in the frame and face the camera."
384
+ if metrics.get("quality", 0) < 40:
385
+ return "Increase lighting and hold steady for a clearer view."
386
+ if metrics.get("jitter") is not None and metrics["jitter"] > 0.08:
387
+ return "Try to keep your head steady while answering."
388
+ if metrics.get("eyes", 0) == 0:
389
+ return "Ensure your eyes are visible; avoid looking away."
390
+ return "Proceed with your answer."
391
+
392
+ def _normalize_epoch_to_seconds(value):
393
+ """Normalize a numeric epoch timestamp to seconds.
394
+ Accepts seconds (e.g., 1730971974) or milliseconds (e.g., 1730971974123).
395
+ Returns float seconds or None if invalid.
396
+ """
397
+ try:
398
+ if value is None:
399
+ return None
400
+ v = float(value)
401
+ # Heuristic: treat large values as ms
402
+ if v > 1e11:
403
+ return v / 1000.0
404
+ return v
405
+ except Exception:
406
+ return None
407
+
408
+ def _aggregate_interval_from_history(face_state, start_s, end_s):
409
+ """Aggregate metrics from face_state['history'] within [start_s, end_s].
410
+ Returns dict with per-answer stats.
411
+ """
412
+ history = face_state.get("history", [])
413
+ if not history or start_s is None or end_s is None or end_s < start_s:
414
+ return {
415
+ "duration": 0.0,
416
+ "frames": 0,
417
+ "face_presence_rate": 0.0,
418
+ "avg_quality": None,
419
+ "avg_brightness": None,
420
+ "avg_jitter": None,
421
+ "avg_eyes": None,
422
+ "smile_rate": 0.0
423
+ }
424
+ samples = [h for h in history if start_s <= h.get("t", 0) <= end_s]
425
+ if not samples:
426
+ return {
427
+ "duration": round(float(max(0.0, end_s - start_s)), 3),
428
+ "frames": 0,
429
+ "face_presence_rate": 0.0,
430
+ "avg_quality": None,
431
+ "avg_brightness": None,
432
+ "avg_jitter": None,
433
+ "avg_eyes": None,
434
+ "smile_rate": 0.0
435
+ }
436
+ def _avg(arr):
437
+ return round(float(sum(arr) / len(arr)), 3) if arr else None
438
+ frames = len(samples)
439
+ presence = [1.0 if s.get("face_present") else 0.0 for s in samples]
440
+ qualities = [s.get("quality") for s in samples if s.get("quality") is not None]
441
+ brights = [s.get("brightness") for s in samples if s.get("brightness") is not None]
442
+ jitters = [s.get("jitter") for s in samples if s.get("jitter") is not None]
443
+ eyes = [s.get("eyes") for s in samples if s.get("eyes") is not None]
444
+ smiles = [s.get("smile") for s in samples if s.get("smile") is not None]
445
+ # Expression distribution
446
+ expr_hist = {}
447
+ inv_expr_hist = {}
448
+ tag_hist = {}
449
+ for s in samples:
450
+ ex = s.get("investigative_expression") or s.get("expression") or "unknown"
451
+ expr_hist[ex] = expr_hist.get(ex, 0) + 1
452
+ inv = s.get("investigative_expression") or "unknown"
453
+ inv_expr_hist[inv] = inv_expr_hist.get(inv, 0) + 1
454
+ tags = s.get("behavior_tags") or []
455
+ for t in tags:
456
+ tag_hist[t] = tag_hist.get(t, 0) + 1
457
+ dominant_expr = None
458
+ if expr_hist:
459
+ dominant_expr = max(expr_hist.items(), key=lambda kv: kv[1])[0]
460
+ dominant_investigative = None
461
+ if inv_expr_hist:
462
+ dominant_investigative = max(inv_expr_hist.items(), key=lambda kv: kv[1])[0]
463
+
464
+ return {
465
+ "duration": round(float(max(0.0, end_s - start_s)), 3),
466
+ "frames": frames,
467
+ "face_presence_rate": round(_avg(presence) if presence else 0.0, 3),
468
+ "avg_quality": _avg(qualities),
469
+ "avg_brightness": _avg(brights),
470
+ "avg_jitter": _avg(jitters),
471
+ "avg_eyes": _avg(eyes),
472
+ "smile_rate": round(float(sum(1 for v in smiles if v and v > 0) / frames), 3) if frames else 0.0,
473
+ "expression_distribution": expr_hist,
474
+ "investigative_expression_distribution": inv_expr_hist,
475
+ "behavior_tag_distribution": tag_hist,
476
+ "dominant_expression": dominant_expr,
477
+ "dominant_investigative_expression": dominant_investigative
478
+ }
479
+
480
+ def _clamp(v, lo=0.0, hi=100.0):
481
+ return float(max(lo, min(hi, v)))
482
+
483
+ def _safe_ratio(a, b):
484
+ try:
485
+ if b == 0:
486
+ return 0.0
487
+ return float(a) / float(b)
488
+ except Exception:
489
+ return 0.0
490
+
491
+ def compute_investigative_assessment(final_result, face_body):
492
+ """Produce investigation-oriented cues and an involvement score (0-100).
493
+ Inputs:
494
+ - final_result: dict with keys like {"truth_score": number, "label": str}
495
+ - face_body: {"metrics": { ... aggregation ... }} or None
496
+ Output schema:
497
+ {
498
+ "nonverbal_score": number, # 0..100 (higher means more concerning)
499
+ "involvement_score": number, # 0..100 (higher means likely involved)
500
+ "cues": [str], # textual cues detected
501
+ "rationale": str # brief explanation
502
+ }
503
+ """
504
+ cues = []
505
+ nonverbal = 0.0
506
+ metrics = (face_body or {}).get("metrics") if face_body else None
507
+
508
+ # Base involvement from AI label (maps investigative label to base risk)
509
+ label = (final_result or {}).get("label", "").upper()
510
+ base_involvement = 50.0
511
+ if "GUILTY" in label:
512
+ base_involvement = 80.0
513
+ elif "INNOCENT" in label:
514
+ base_involvement = 20.0
515
+ elif "EVASIVE" in label:
516
+ base_involvement = 60.0
517
+
518
+ if metrics:
519
+ fpr = float(metrics.get("face_presence_rate", 0.0) or 0.0)
520
+ jitter = metrics.get("avg_jitter")
521
+ smile_rate = float(metrics.get("smile_rate") or 0.0)
522
+ avg_eyes = metrics.get("avg_eyes")
523
+ expr_hist = metrics.get("investigative_expression_distribution", {}) or {}
524
+ dominant_expr = (metrics.get("dominant_investigative_expression") or "").lower()
525
+
526
+ # Avoidance cue: low presence in window
527
+ if fpr < 0.5:
528
+ cues.append("face_avoidance")
529
+ nonverbal += 25.0 * (0.5 - fpr) / 0.5 # up to +25
530
+
531
+ # Movement (jitter) cue: normalized
532
+ if jitter is not None:
533
+ # Typical steady jitter ~0.0-0.04; higher suggests agitation
534
+ if jitter > 0.08:
535
+ cues.append("high_head_movement")
536
+ nonverbal += _clamp(((float(jitter) - 0.03) / 0.12) * 40.0, 0.0, 40.0)
537
+
538
+ # Expression cues
539
+ total_frames = sum(expr_hist.values()) or 0
540
+ stress_ratio = _safe_ratio(expr_hist.get("stress", 0), total_frames)
541
+ avoidance_ratio = _safe_ratio(expr_hist.get("avoidance", 0), total_frames)
542
+ conceal_ratio = _safe_ratio(expr_hist.get("concealment", 0), total_frames)
543
+ masking_ratio = _safe_ratio(expr_hist.get("masking_smile", 0), total_frames)
544
+ composed_ratio = _safe_ratio(expr_hist.get("composed", 0), total_frames)
545
+
546
+ if stress_ratio >= 0.15:
547
+ cues.append("stress_cue")
548
+ nonverbal += 12.0 * (stress_ratio / 0.5)
549
+ if avoidance_ratio >= 0.15:
550
+ cues.append("avoidance_cue")
551
+ nonverbal += 14.0 * (avoidance_ratio / 0.5)
552
+ if conceal_ratio >= 0.10:
553
+ cues.append("concealment_cue")
554
+ nonverbal += 10.0 * (conceal_ratio / 0.4)
555
+ if masking_ratio >= 0.10:
556
+ cues.append("masking_smile_cue")
557
+ nonverbal += 8.0 * (masking_ratio / 0.4)
558
+
559
+ # Incongruent affect: many smiles but low eyes -> suspicion
560
+ if smile_rate > 0.35 and (avg_eyes is not None and avg_eyes < 1.0):
561
+ cues.append("incongruent_affect")
562
+ nonverbal += 10.0
563
+
564
+ # Calming / mitigating cues reduce score
565
+ if composed_ratio >= 0.40 and jitter is not None and jitter <= 0.03:
566
+ nonverbal -= 12.0
567
+ if dominant_expr == "composed" and fpr >= 0.85:
568
+ nonverbal -= 6.0
569
+
570
+ nonverbal = _clamp(nonverbal, 0.0, 100.0)
571
+
572
+ # Combine with AI judgement into involvement score
573
+ involvement = _clamp(0.7 * base_involvement + 0.3 * nonverbal, 0.0, 100.0)
574
+
575
+ rationale = (
576
+ f"Base={int(base_involvement)} from label '{label}', "
577
+ f"Nonverbal={int(nonverbal)} via cues: {', '.join(cues) if cues else 'none'}"
578
+ )
579
+
580
+ return {
581
+ "nonverbal_score": round(nonverbal, 1),
582
+ "involvement_score": round(involvement, 1),
583
+ "cues": cues,
584
+ "rationale": rationale
585
+ }
586
+
587
+ def retrieve_relevant_context(query, top_k=3, use_new_reference=False):
588
+ """Retrieve relevant book context from FAISS."""
589
+ if use_new_reference:
590
+ index = new_index
591
+ text_chunks = new_text_chunks
592
+ else:
593
+ index = old_index # Default to old reference if new is not selected
594
+ text_chunks = old_text_chunks
595
+
596
+ if index is None or len(text_chunks) == 0:
597
+ return "No reference context found (FAISS not loaded)."
598
+
599
+ query_vector = MODEL.encode([query]).astype('float32')
600
+ D, I = index.search(query_vector, k=top_k)
601
+ valid_indices = [i for i in I[0] if i < len(text_chunks)]
602
+ results = [text_chunks[i] for i in valid_indices]
603
+ return "\n".join(results)
604
+
605
+ def detect_crime_type(brief_description: str):
606
+ """Automatically detect crime type from description."""
607
+ if not brief_description or len(brief_description.strip()) == 0:
608
+ return "Unknown"
609
+ text = brief_description.lower()
610
+ crime_keywords = {
611
+ "kidnap": "Kidnapping",
612
+ "abduct": "Kidnapping",
613
+ "murder": "Murder",
614
+ "kill": "Murder",
615
+ "stab": "Murder",
616
+ "shoot": "Murder",
617
+ "theft": "Theft",
618
+ "steal": "Theft",
619
+ "rob": "Robbery",
620
+ "burglar": "Burglary",
621
+ "attack": "Assault",
622
+ "assault": "Assault",
623
+ "fraud": "Fraud",
624
+ "scam": "Fraud",
625
+ "arson": "Arson",
626
+ "fire": "Arson",
627
+ "rape": "Sexual Assault",
628
+ "harass": "Harassment",
629
+ "poison": "Attempted Murder"
630
+ }
631
+ for keyword, crime_type in crime_keywords.items():
632
+ if keyword in text:
633
+ return crime_type
634
+ return "Unknown"
635
+
636
+
637
+ # ------------------------------------------------------------
638
+ # AUTH ROUTES
639
+ # ------------------------------------------------------------
640
+ @app.route('/sign-in', methods=['POST'])
641
+ def sign_in():
642
+ data = request.json
643
+ email = data.get('email')
644
+ password = data.get('password')
645
+ conn = get_db_connection()
646
+ cursor = conn.cursor()
647
+ cursor.execute('SELECT * FROM Users WHERE email = ?', (email,))
648
+ user = cursor.fetchone()
649
+ cursor.close()
650
+ conn.close()
651
+ if user and check_password_hash(user[4], password):
652
+ return jsonify({"message": "Login successful", "user": {
653
+ "id": user[0], "name": user[1], "role": user[2], "email": user[3]
654
+ }}), 200
655
+ elif user:
656
+ return jsonify({"message": "Invalid password"}), 401
657
+ else:
658
+ return jsonify({"message": "Email not found"}), 404
659
+
660
+ @app.route('/sign-up', methods=['POST'])
661
+ def sign_up():
662
+ data = request.json
663
+ name, role, email, password = data.get('name'), data.get('role'), data.get('email'), data.get('password')
664
+ if not email or not password:
665
+ return jsonify({"message": "Email and password are required"}), 400
666
+ conn = get_db_connection()
667
+ cursor = conn.cursor()
668
+ cursor.execute('SELECT * FROM Users WHERE email = ?', (email,))
669
+ if cursor.fetchone():
670
+ return jsonify({"message": "Email already exists"}), 400
671
+ hashed_password = generate_password_hash(password)
672
+ cursor.execute('INSERT INTO Users (name, role, email, password) VALUES (?, ?, ?, ?)',
673
+ (name, role, email, hashed_password))
674
+ conn.commit()
675
+ cursor.close()
676
+ conn.close()
677
+ return jsonify({"message": "User created successfully"}), 201
678
+
679
+ # ------------------------------------------------------------
680
+ # CORE LOGIC
681
+ # ------------------------------------------------------------
682
+ sessions = {}
683
+
684
+ @app.route("/health", methods=["GET"])
685
+ def health_check():
686
+ return jsonify({
687
+ "status": "healthy",
688
+ "service": "PyDetect backend",
689
+ "features": {
690
+ "crime_type_detection": "enabled",
691
+ "question_generation": "enabled",
692
+ "response_analysis": "enabled",
693
+ "report_generation": "enabled",
694
+ "validation_results": "enabled"
695
+ },
696
+ "endpoints": [
697
+ "/sign-in", "/sign-up", "/start_session",
698
+ "/submit_profile", "/submit_case", "/submit_response",
699
+ "/get_report", "/get_validation_results", "/ask_question"
700
+ ]
701
+ }), 200
702
+
703
+ @app.route("/start_session", methods=["POST"])
704
+ def start_session():
705
+ data = request.get_json(silent=True)
706
+ brief_description = ""
707
+ if data:
708
+ brief_description = data.get("briefDescription", "")
709
+ print(f"[SESSION START] brief_description: {brief_description}")
710
+ sid = str(uuid.uuid4())
711
+ sessions[sid] = {"profile": {"brief_description": brief_description} if brief_description else {}, "crime_type": "", "responses": [], "last_question": ""}
712
+ return jsonify({"session_id": sid})
713
+
714
+ @app.route("/submit_profile", methods=["POST"])
715
+ def submit_profile():
716
+ data = request.get_json(force=True)
717
+ sid = data.get("session_id")
718
+ if sid not in sessions:
719
+ return jsonify({"error": "Invalid session_id"}), 400
720
+ profile = data.get("profile", {})
721
+ brief = profile.get("brief_description", "")
722
+ detected = detect_crime_type(brief)
723
+ profile["crime_type"] = detected
724
+ sessions[sid]["profile"] = profile
725
+ sessions[sid]["crime_type"] = detected
726
+ return jsonify({"status": "Profile saved", "detected_crime_type": detected})
727
+
728
+ @app.route("/submit_case", methods=["POST"])
729
+ def submit_case_alias():
730
+ """For older frontend compatibility."""
731
+ data = request.get_json(force=True)
732
+ sid = data.get("session_id")
733
+ if sid not in sessions:
734
+ return jsonify({"error": "Invalid session_id"}), 400
735
+ profile = data.get("case_data") or data.get("profile") or {}
736
+ brief = profile.get("brief_description", "")
737
+ detected = detect_crime_type(brief)
738
+ profile["crime_type"] = detected
739
+ sessions[sid]["profile"] = profile
740
+ sessions[sid]["crime_type"] = detected
741
+ return jsonify({"status": "Case saved", "detected_crime_type": detected}), 200
742
+
743
+ # ------------------------------------------------------------
744
+ # AI RESPONSE + REPORT + QUESTIONS
745
+ # ------------------------------------------------------------
746
+ @app.route("/submit_response", methods=["POST"])
747
+ def submit_response():
748
+ try:
749
+ data = request.get_json(force=True)
750
+ sid, text = data.get("session_id"), data.get("text", "")
751
+ if sid not in sessions:
752
+ return jsonify({"error": "Invalid session_id"}), 400
753
+ session = sessions[sid]
754
+ profile = session.get("profile", {})
755
+ case_desc = profile.get("brief_description", "No description.")
756
+ last_q = session.get("last_question", "")
757
+ responses = session.get("responses", [])
758
+ history = "\n".join([f"Q: {r['question']}\nA: {r['text']}" for r in responses[-3:]]) if responses else ""
759
+ # Optional frontend timing fields
760
+ answer_start_at_ms = data.get("answer_start_at")
761
+ answer_end_at_ms = data.get("answer_end_at")
762
+ duration_ms = data.get("duration_ms")
763
+ mode = data.get("mode") # 'voice' | 'text' | 'mixed'
764
+ question_id = data.get("question_id")
765
+ response_id = data.get("response_id")
766
+
767
+ # Retrieve relevant context from reference material (if available)
768
+ context = retrieve_relevant_context(case_desc, top_k=3, use_new_reference=True)
769
+
770
+ # Calculate semantic similarity between answer and context
771
+ if context and context != "No reference context found (FAISS not loaded).":
772
+ answer_vec = MODEL.encode([text])[0]
773
+ context_vec = MODEL.encode([context])[0]
774
+ similarity = float(np.dot(answer_vec, context_vec) / (np.linalg.norm(answer_vec) * np.linalg.norm(context_vec)))
775
+ similarity_score = round(similarity * 100, 2)
776
+ else:
777
+ similarity_score = None
778
+
779
+ # Enhanced prompt for OpenAI
780
+ prompt = f"""
781
+ You are a detective analyzing a suspect's reply.
782
+
783
+ CASE: {case_desc}
784
+ QUESTION: {last_q}
785
+ ANSWER: {text}
786
+ HISTORY:
787
+ {history if history else 'None'}
788
+
789
+ REFERENCE CONTEXT (from official guide):
790
+ {context if context else 'None'}
791
+
792
+ First, compare the answer to the reference context and rate its factual alignment (0-100).
793
+ Second, classify the reply as CONFESSION, DENIAL, or EVASIVE.
794
+ Third, give detailed reasoning, confidence (0–100), and final label: LIKELY GUILTY, LIKELY INNOCENT, or EVASIVE.
795
+ Return JSON like:
796
+ {{"truth_score": number, "label": "string", "reason": "string", "similarity_score": number}}
797
+ """
798
+ response = requests.post(
799
+ "https://api.openai.com/v1/chat/completions",
800
+ headers={"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"},
801
+ json={"model": "gpt-3.5-turbo", "messages": [
802
+ {"role": "system", "content": "You are a professional detective."},
803
+ {"role": "user", "content": prompt}
804
+ ], "temperature": 0.3, "max_tokens": 250}, timeout=60)
805
+ import json
806
+ result = json.loads(response.json()["choices"][0]["message"]["content"])
807
+ # Optionally, blend the AI's truth_score with the semantic similarity
808
+ if similarity_score is not None and "truth_score" in result:
809
+ # Weighted average: 70% AI, 30% similarity
810
+ result["truth_score"] = round(0.7 * result["truth_score"] + 0.3 * similarity_score, 2)
811
+ result["similarity_score"] = similarity_score
812
+ record = {"question": last_q, "text": text, "final": result}
813
+ # Persist optional timing/meta for traceability
814
+ trace = {}
815
+ if question_id: trace["question_id"] = question_id
816
+ if response_id: trace["response_id"] = response_id
817
+ if mode: trace["mode"] = mode
818
+ if answer_start_at_ms is not None: trace["answer_start_at"] = answer_start_at_ms
819
+ if answer_end_at_ms is not None: trace["answer_end_at"] = answer_end_at_ms
820
+ if duration_ms is not None: trace["duration_ms"] = duration_ms
821
+ if trace:
822
+ record["timing"] = trace
823
+
824
+ # If timing provided, aggregate face/body metrics over that interval from history
825
+ try:
826
+ face_state = session.setdefault("face", {})
827
+ start_s = _normalize_epoch_to_seconds(answer_start_at_ms)
828
+ end_s = _normalize_epoch_to_seconds(answer_end_at_ms)
829
+ if start_s is None and end_s is None and duration_ms is not None:
830
+ # If only duration provided, use server 'now' as end
831
+ end_s = time.time()
832
+ start_s = end_s - max(0.0, float(duration_ms) / 1000.0)
833
+ elif start_s is not None and end_s is None and duration_ms is not None:
834
+ end_s = start_s + max(0.0, float(duration_ms) / 1000.0)
835
+ elif end_s is not None and start_s is None and duration_ms is not None:
836
+ start_s = end_s - max(0.0, float(duration_ms) / 1000.0)
837
+
838
+ if start_s is not None and end_s is not None:
839
+ agg = _aggregate_interval_from_history(face_state, start_s, end_s)
840
+ record["face_body"] = {
841
+ "start": start_s,
842
+ "end": end_s,
843
+ "metrics": agg
844
+ }
845
+ except Exception:
846
+ # Do not fail the request because of metrics aggregation
847
+ pass
848
+
849
+ session["responses"].append(record)
850
+ # After appending, compute investigative assessment (uses face_body if present)
851
+ try:
852
+ assessment = compute_investigative_assessment(record.get("final"), record.get("face_body"))
853
+ record["investigative_assessment"] = assessment
854
+ except Exception:
855
+ pass
856
+ # If there's a recent, unattached answer segment, attach it to this response
857
+ try:
858
+ segs = session.get("answer_segments") or []
859
+ if segs:
860
+ last_seg = segs[-1]
861
+ if not last_seg.get("attached"):
862
+ session["responses"][-1]["face_body"] = last_seg
863
+ last_seg["attached"] = True
864
+ except Exception:
865
+ pass
866
+ session["last_answer"] = text
867
+ return jsonify(result)
868
+ except Exception as e:
869
+ return jsonify({"error": str(e)}), 500
870
+
871
+ @app.route("/get_report/<session_id>", methods=["GET"])
872
+ def get_report(session_id):
873
+ if session_id not in sessions:
874
+ return jsonify({"error": "Invalid session_id"}), 400
875
+
876
+ session = sessions[session_id]
877
+ profile = session.get("profile", {})
878
+ crime = profile.get("brief_description", "unspecified")
879
+ responses = session.get("responses", [])
880
+
881
+ if not responses:
882
+ return jsonify({
883
+ "report": "No responses yet.",
884
+ "verdict": "Inconclusive",
885
+ "truePercentage": 0,
886
+ "falsePercentage": 0,
887
+ "truthScore": 0,
888
+ "avg_truth_score": 0,
889
+ "validationResult": "Inconclusive",
890
+ "session_duration": "0 minutes",
891
+ "questions_answered": 0
892
+ }), 200
893
+
894
+ # Calculate truth scores and statistics
895
+ truth_scores = [r["final"]["truth_score"] for r in responses]
896
+ avg_truth_score = sum(truth_scores) / len(truth_scores)
897
+
898
+ # Calculate percentages for frontend validation page
899
+ true_percentage = max(0, min(100, avg_truth_score))
900
+ false_percentage = 100 - true_percentage
901
+
902
+ # Determine validation result
903
+ if avg_truth_score >= 70:
904
+ validation_result = "LIKELY TRUTHFUL"
905
+ elif avg_truth_score >= 50:
906
+ validation_result = "INCONCLUSIVE"
907
+ else:
908
+ validation_result = "LIKELY DECEPTIVE"
909
+
910
+ # Build interview transcript for AI analysis
911
+ interview = "\n".join([
912
+ f"Detective: {r['question']}\nAccused: {r['text']}\nAssessment: {r['final']['label']} ({r['final']['truth_score']}%)"
913
+ for r in responses])
914
+
915
+ # Generate AI verdict if OpenAI is available
916
+ final_verdict = "Inconclusive"
917
+ summary = "Analysis based on response patterns and truth indicators."
918
+
919
+ if OPENAI_API_KEY:
920
+ try:
921
+ prompt = f"""
922
+ Summarize this interrogation and decide verdict.
923
+
924
+ CASE: {crime}
925
+ INTERVIEW:
926
+ {interview}
927
+
928
+ Respond JSON:
929
+ {{"final_verdict": "string", "summary": "string"}}
930
+ """
931
+ response = requests.post(
932
+ "https://api.openai.com/v1/chat/completions",
933
+ headers={"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"},
934
+ json={"model": "gpt-3.5-turbo", "messages": [
935
+ {"role": "system", "content": "You are a detective summarizing interrogation."},
936
+ {"role": "user", "content": prompt}
937
+ ], "temperature": 0.4, "max_tokens": 300}, timeout=60)
938
+
939
+ import json
940
+ ai_result = json.loads(response.json()["choices"][0]["message"]["content"])
941
+ final_verdict = ai_result.get("final_verdict", final_verdict)
942
+ summary = ai_result.get("summary", summary)
943
+ except Exception as e:
944
+ print(f"AI analysis failed: {e}")
945
+
946
+ # Aggregate involvement from investigative assessments if present
947
+ involvement_scores = []
948
+ cue_counter = {}
949
+ for r in responses:
950
+ ia = r.get("investigative_assessment")
951
+ if ia and isinstance(ia.get("involvement_score"), (int, float)):
952
+ involvement_scores.append(float(ia["involvement_score"]))
953
+ if ia and isinstance(ia.get("cues"), list):
954
+ for c in ia["cues"]:
955
+ cue_counter[c] = cue_counter.get(c, 0) + 1
956
+ avg_involvement = round(float(sum(involvement_scores) / len(involvement_scores)), 1) if involvement_scores else 0.0
957
+ high_risk = sum(1 for s in involvement_scores if s >= 70)
958
+ moderate = sum(1 for s in involvement_scores if 40 <= s < 70)
959
+ low_risk = sum(1 for s in involvement_scores if s < 40)
960
+
961
+ # Return comprehensive data for frontend validation page
962
+ return jsonify({
963
+ "final_verdict": final_verdict,
964
+ "summary": summary,
965
+ "truePercentage": round(true_percentage, 1),
966
+ "falsePercentage": round(false_percentage, 1),
967
+ "truthScore": round(avg_truth_score, 1),
968
+ "avg_truth_score": round(avg_truth_score, 1),
969
+ "validationResult": validation_result,
970
+ "session_duration": f"{len(responses) * 2} minutes", # Estimate 2 minutes per Q&A
971
+ "questions_answered": len(responses),
972
+ "responses": responses,
973
+ "case_summary": crime,
974
+ "overall_involvement": {
975
+ "avg_involvement_score": avg_involvement,
976
+ "high_risk_count": high_risk,
977
+ "moderate_count": moderate,
978
+ "low_risk_count": low_risk,
979
+ "top_cues": sorted(cue_counter.items(), key=lambda kv: kv[1], reverse=True)[:5]
980
+ },
981
+ "detailed_analysis": {
982
+ "truth_indicators": sum(1 for r in responses if r["final"]["truth_score"] > 70),
983
+ "deception_indicators": sum(1 for r in responses if r["final"]["truth_score"] < 50),
984
+ "neutral_responses": sum(1 for r in responses if 50 <= r["final"]["truth_score"] <= 70),
985
+ "highest_truth_score": max(truth_scores) if truth_scores else 0,
986
+ "lowest_truth_score": min(truth_scores) if truth_scores else 0,
987
+ "consistency_rating": "High" if max(truth_scores) - min(truth_scores) < 30 else "Moderate" if max(truth_scores) - min(truth_scores) < 50 else "Low"
988
+ }
989
+ })
990
+
991
+ @app.route("/get_validation_results/<session_id>", methods=["GET"])
992
+ def get_validation_results(session_id):
993
+ """
994
+ Endpoint specifically designed for the validation page component
995
+ Returns data in the exact format expected by the frontend
996
+ """
997
+ if session_id not in sessions:
998
+ return jsonify({"error": "Invalid session_id"}), 400
999
+
1000
+ session = sessions[session_id]
1001
+ profile = session.get("profile", {})
1002
+ responses = session.get("responses", [])
1003
+
1004
+ if not responses:
1005
+ return jsonify({
1006
+ "truePercentage": 0,
1007
+ "falsePercentage": 0,
1008
+ "truthScore": 0,
1009
+ "avg_truth_score": 0,
1010
+ "validationResult": "Inconclusive - No responses recorded",
1011
+ "session_duration": "0 minutes",
1012
+ "questions_answered": 0,
1013
+ "report": {
1014
+ "case_summary": profile.get("brief_description", "No case details"),
1015
+ "total_questions": 0,
1016
+ "analysis_complete": False
1017
+ }
1018
+ }), 200
1019
+
1020
+ # Calculate comprehensive validation metrics
1021
+ truth_scores = [r["final"]["truth_score"] for r in responses]
1022
+ avg_truth_score = sum(truth_scores) / len(truth_scores)
1023
+
1024
+ # Frontend-compatible percentages
1025
+ true_percentage = max(0, min(100, avg_truth_score))
1026
+ false_percentage = 100 - true_percentage
1027
+
1028
+ # Determine validation result with detailed reasoning
1029
+ if avg_truth_score >= 85:
1030
+ validation_result = "HIGHLY TRUTHFUL"
1031
+ elif avg_truth_score >= 70:
1032
+ validation_result = "LIKELY TRUTHFUL"
1033
+ elif avg_truth_score >= 50:
1034
+ validation_result = "INCONCLUSIVE"
1035
+ elif avg_truth_score >= 30:
1036
+ validation_result = "LIKELY DECEPTIVE"
1037
+ else:
1038
+ validation_result = "HIGHLY DECEPTIVE"
1039
+
1040
+ # Calculate session duration (estimate)
1041
+ estimated_duration = len(responses) * 2 # 2 minutes per question
1042
+ session_duration = f"{estimated_duration} minutes"
1043
+
1044
+ # Build comprehensive report object
1045
+ report_data = {
1046
+ "case_summary": profile.get("brief_description", "No case details"),
1047
+ "crime_type": session.get("crime_type", "Unknown"),
1048
+ "total_questions": len(responses),
1049
+ "analysis_complete": True,
1050
+ "truth_indicators": sum(1 for r in responses if r["final"]["truth_score"] > 70),
1051
+ "deception_indicators": sum(1 for r in responses if r["final"]["truth_score"] < 50),
1052
+ "neutral_responses": sum(1 for r in responses if 50 <= r["final"]["truth_score"] <= 70),
1053
+ "session_start": "Current session",
1054
+ "avg_truth_score": avg_truth_score,
1055
+ "validationResult": validation_result,
1056
+ "session_duration": session_duration,
1057
+ "questions_answered": len(responses),
1058
+ "detailed_responses": [
1059
+ {
1060
+ "question": r.get("question", ""),
1061
+ "answer": r.get("text", ""),
1062
+ "truth_score": r["final"]["truth_score"],
1063
+ "label": r["final"]["label"],
1064
+ "reason": r["final"].get("reason", "")
1065
+ } for r in responses
1066
+ ]
1067
+ }
1068
+
1069
+ return jsonify({
1070
+ "truePercentage": round(true_percentage, 1),
1071
+ "falsePercentage": round(false_percentage, 1),
1072
+ "truthScore": round(avg_truth_score, 1),
1073
+ "avg_truth_score": round(avg_truth_score, 1),
1074
+ "validationResult": validation_result,
1075
+ "session_duration": session_duration,
1076
+ "questions_answered": len(responses),
1077
+ "report": report_data,
1078
+ "success": True
1079
+ })
1080
+
1081
+ @app.route("/ask_question", methods=["GET"])
1082
+ def ask_question():
1083
+ try:
1084
+ if not OPENAI_API_KEY:
1085
+ return jsonify({"error": "No API key configured"}), 500
1086
+
1087
+ sid = request.args.get("session_id")
1088
+ if not sid or sid not in sessions:
1089
+ return jsonify({"error": "Invalid session_id"}), 400
1090
+
1091
+ s = sessions[sid]
1092
+
1093
+ # Update session with latest crime_type and brief_description if provided
1094
+ crime_type_param = request.args.get("crime_type")
1095
+ brief_description_param = request.args.get("brief_description")
1096
+ if crime_type_param:
1097
+ s["crime_type"] = crime_type_param
1098
+ if brief_description_param is not None:
1099
+ s.setdefault("profile", {})["brief_description"] = brief_description_param
1100
+
1101
+ profile = s.get("profile", {})
1102
+ brief_description = profile.get("brief_description", "No brief description provided.")
1103
+ ctype = s.get("crime_type", "Unknown")
1104
+ history = "\n".join([f"Detective: {r['question']}\nAccused: {r['text']}" for r in s.get("responses", [])[-3:]]) if s.get("responses") else ""
1105
+
1106
+ # Retrieve relevant context from the reference (old/new based on case data)
1107
+ context = retrieve_relevant_context(f"{ctype} investigation", use_new_reference=True)
1108
+
1109
+ prompt = f"""
1110
+ You are Detective Johnson investigating a {ctype.lower()}.
1111
+
1112
+ CASE TYPE: {ctype}
1113
+ BRIEF DESCRIPTION: {brief_description}
1114
+ CONTEXT: {context}
1115
+ HISTORY: {history if history else 'No previous questions.'}
1116
+
1117
+ Your task:
1118
+ Ask ONE short, simple, clear, and high-quality question in plain English (≤25 words) that follows up naturally on the latest answer and case context.
1119
+ Avoid complex language, jargon, or generic questions. Make sure the question is easy to understand and relevant to the investigation.
1120
+ """
1121
+
1122
+ # Call OpenAI API to generate a question based on the context and case details
1123
+ response = requests.post(
1124
+ "https://api.openai.com/v1/chat/completions",
1125
+ headers={"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"},
1126
+ json={"model": "gpt-3.5-turbo", "messages": [
1127
+ {"role": "system", "content": "You are a skilled detective conducting interrogations."},
1128
+ {"role": "user", "content": prompt}
1129
+ ], "temperature": 0.7, "max_tokens": 80}, timeout=60)
1130
+
1131
+ q = response.json()["choices"][0]["message"]["content"].strip()
1132
+ s["last_question"] = q # Store the last generated question
1133
+ return jsonify({"question": q})
1134
+ except Exception as e:
1135
+ import traceback
1136
+ print("Error in /ask_question:", e)
1137
+ traceback.print_exc()
1138
+ return jsonify({"error": str(e)}), 500
1139
+
1140
+ # ------------------------------------------------------------
1141
+ # REAL-TIME FACE STREAM ENDPOINTS
1142
+ # ------------------------------------------------------------
1143
+ @app.route("/face_frame", methods=["POST"])
1144
+ def face_frame():
1145
+ """Receive a single frame (base64 image) and update face metrics for the session.
1146
+ Expected JSON: {"session_id": str, "frame": "data:image/...;base64,...."}
1147
+ Returns latest metrics and a recommended command.
1148
+ """
1149
+ data = request.get_json(silent=True) or {}
1150
+ sid = data.get("session_id")
1151
+ frame_b64 = data.get("frame")
1152
+ if not sid or sid not in sessions:
1153
+ return jsonify({"error": "Invalid session_id"}), 400
1154
+ if not frame_b64 or not isinstance(frame_b64, str):
1155
+ return jsonify({"error": "No frame provided"}), 400
1156
+
1157
+ # Strip possible data URL header
1158
+ if "," in frame_b64:
1159
+ frame_b64 = frame_b64.split(",", 1)[1]
1160
+ try:
1161
+ img_bytes = base64.b64decode(frame_b64)
1162
+ nparr = np.frombuffer(img_bytes, np.uint8)
1163
+ img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
1164
+ except Exception as e:
1165
+ return jsonify({"error": f"Invalid image data: {e}"}), 400
1166
+
1167
+ face_state = sessions[sid].setdefault("face", {})
1168
+ prev_metrics = face_state.get("last_metrics")
1169
+ metrics = analyze_frame(img, previous=prev_metrics)
1170
+ # Simple command logic for MTCNN (can be expanded)
1171
+ cmd = "Proceed with your answer." if metrics.get("face_present") else "Please position your face in the frame and face the camera."
1172
+
1173
+ # Keep a short history and update stats
1174
+ ts = time.time()
1175
+ history = face_state.setdefault("history", [])
1176
+ history.append({"t": ts, **metrics})
1177
+ # Limit history length (keep more frames to cover longer answers)
1178
+ if len(history) > 600:
1179
+ history.pop(0)
1180
+ face_state["last_metrics"] = metrics
1181
+ face_state["last_update"] = ts
1182
+ face_state["last_command"] = cmd
1183
+
1184
+ return jsonify({"metrics": metrics, "command": cmd, "timestamp": ts})
1185
+
1186
+ @app.route("/face_status", methods=["GET"])
1187
+ def face_status():
1188
+ sid = request.args.get("session_id")
1189
+ if not sid or sid not in sessions:
1190
+ return jsonify({"error": "Invalid session_id"}), 400
1191
+ face_state = sessions[sid].get("face", {})
1192
+ metrics = face_state.get("last_metrics")
1193
+ cmd = face_state.get("last_command") or recommend_command(metrics)
1194
+ updated = face_state.get("last_update")
1195
+ return jsonify({
1196
+ "metrics": metrics,
1197
+ "command": cmd,
1198
+ "last_update": updated,
1199
+ "history_len": len(face_state.get("history", []))
1200
+ })
1201
+
1202
+ # ------------------------------------------------------------
1203
+ # BODY LANGUAGE QUERY FUNCTION
1204
+ # ------------------------------------------------------------
1205
+ def query_body_language_cue(cue_text, top_k=1):
1206
+ """
1207
+ Query the body language FAISS index with a cue or description.
1208
+ Returns the most relevant entry's meaning and explanation.
1209
+ """
1210
+ if body_book_index is None or not body_book_entries:
1211
+ return {"error": "Body language FAISS index not loaded."}
1212
+ cue_vec = MODEL_BODY.encode([cue_text]).astype('float32')
1213
+ D, I = body_book_index.search(cue_vec, k=top_k)
1214
+ valid_indices = [i for i in I[0] if i < len(body_book_entries)]
1215
+ results = [body_book_entries[i] for i in valid_indices]
1216
+ return results[0] if results else {"error": "No matching body language entry found."}
1217
+
1218
+ # ------------------------------------------------------------
1219
+ # BODY LANGUAGE EXPLANATION API ENDPOINT
1220
+ # ------------------------------------------------------------
1221
+ @app.route("/body_language_explain", methods=["POST"])
1222
+ def body_language_explain():
1223
+ """
1224
+ API endpoint to get body language meaning/explanation for a detected cue.
1225
+ Expects JSON: {"cue": "..."}
1226
+ Returns: {"meaning": ..., "explanation": ...} or error
1227
+ """
1228
+ data = request.get_json(force=True)
1229
+ cue = data.get("cue", "")
1230
+ if not cue:
1231
+ return jsonify({"error": "No cue provided."}), 400
1232
+ result = query_body_language_cue(cue)
1233
+ return jsonify(result)
1234
+
1235
+
1236
+ # ------------------------------------------------------------
1237
+ if __name__ == "__main__":
1238
+ port = int(os.environ.get("PORT", "7860"))
1239
+ print(f"🚀 PyDetect Flask backend running at http://0.0.0.0:{port}")
1240
+ app.run(host="0.0.0.0", port=port, debug=False)
crime_scene_chunks.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb05b6c901c086d0dad3391d0a959a9e68135093ef0d1f6ea493a25cca889e5
3
+ size 79045
crime_scene_index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47e1ae4bc8369955344a5ac0e4e5dddc6a5774242daae8c6fe3c07f300e10d54
3
+ size 106029
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ flask
3
+ flask-cors
4
+ python-dotenv
5
+ werkzeug
6
+ pyodbc
7
+ faiss-cpu
8
+ numpy
9
+ opencv-python-headless
10
+ mtcnn
11
+ fer
12
+ requests
13
+ sentence-transformers
14
+
15
+ # If using SQL Server ODBC driver, install it separately (not via pip)
16
+ # For Windows: https://docs.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server
signin.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pyodbc
3
+ from flask import Flask, request, jsonify
4
+ from werkzeug.security import generate_password_hash, check_password_hash
5
+ from flask_cors import CORS
6
+
7
+ app = Flask(__name__)
8
+
9
+ # -----------------------------------------------
10
+ # CORS: allow multiple origins via env (optional)
11
+ # -----------------------------------------------
12
+ ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*")
13
+ CORS(app, resources={r"/*": {"origins": [o.strip() for o in ALLOWED_ORIGINS.split(",")]}})
14
+
15
+ # ------------------------------------------------------
16
+ # MODE: "local" uses Windows Auth; "server" uses SQL Auth
17
+ # Set MODE=server in Hugging Face Variables & secrets
18
+ # ------------------------------------------------------
19
+ MODE = os.getenv("MODE", "local").lower()
20
+
21
+ # ---------------------------
22
+ # Local (Windows) connection
23
+ # ---------------------------
24
+ LOCAL_SQL_SERVER = os.getenv("LOCAL_SQL_SERVER", r"localhost\SQLEXPRESS")
25
+ LOCAL_SQL_DATABASE = os.getenv("LOCAL_SQL_DATABASE", "PyDetect")
26
+ LOCAL_SQL_DRIVER = os.getenv("LOCAL_SQL_DRIVER", "{ODBC Driver 17 for SQL Server}")
27
+
28
+ # ----------------------------------------
29
+ # Remote (HF/AWS RDS) SQL Auth connection
30
+ # ----------------------------------------
31
+ RDS_SQL_SERVER = os.getenv("RDS_SQL_SERVER", "") # e.g. mydb.abcxyz.ap-south-1.rds.amazonaws.com,1433
32
+ RDS_SQL_DATABASE = os.getenv("RDS_SQL_DATABASE", "PyDetect")
33
+ RDS_SQL_USER = os.getenv("RDS_SQL_USER", "")
34
+ RDS_SQL_PASSWORD = os.getenv("RDS_SQL_PASSWORD", "")
35
+ RDS_SQL_DRIVER = os.getenv("RDS_SQL_DRIVER", "{ODBC Driver 18 for SQL Server}")
36
+ RDS_ENCRYPT = os.getenv("RDS_ENCRYPT", "yes") # yes/no
37
+ RDS_TRUST_CERT = os.getenv("RDS_TRUST_SERVER_CERT", "yes")# yes/no
38
+
39
+ # ======================================================
40
+ # Establishing the database connection using env values
41
+ # (CORE BEHAVIOR UNCHANGED for queries)
42
+ # ======================================================
43
+ def get_db_connection():
44
+ if MODE == "local":
45
+ # Windows Authentication (local)
46
+ connection = pyodbc.connect(
47
+ f"DRIVER={LOCAL_SQL_DRIVER};"
48
+ f"SERVER={LOCAL_SQL_SERVER};"
49
+ f"DATABASE={LOCAL_SQL_DATABASE};"
50
+ f"Trusted_Connection=yes;"
51
+ )
52
+ return connection
53
+ else:
54
+ # SQL Authentication (RDS / Hugging Face)
55
+ connection = pyodbc.connect(
56
+ f"DRIVER={RDS_SQL_DRIVER};"
57
+ f"SERVER={RDS_SQL_SERVER};"
58
+ f"DATABASE={RDS_SQL_DATABASE};"
59
+ f"UID={RDS_SQL_USER};PWD={RDS_SQL_PASSWORD};"
60
+ f"Encrypt={RDS_ENCRYPT};TrustServerCertificate={RDS_TRUST_CERT};"
61
+ f"Connection Timeout=30;"
62
+ )
63
+ return connection
64
+
65
+ # ======================================================
66
+ # Create the User table only on local
67
+ # (CORE CREATE SQL KEPT THE SAME)
68
+ # ======================================================
69
+ def create_user_table():
70
+ conn = get_db_connection()
71
+ cursor = conn.cursor()
72
+ cursor.execute('''
73
+ IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='Users' AND xtype='U')
74
+ CREATE TABLE Users (
75
+ id INT IDENTITY(1,1) PRIMARY KEY,
76
+ name NVARCHAR(120) NOT NULL,
77
+ role NVARCHAR(50) NOT NULL,
78
+ email NVARCHAR(120) UNIQUE NOT NULL,
79
+ password NVARCHAR(255) NOT NULL
80
+ )
81
+ ''')
82
+ conn.commit()
83
+ cursor.close()
84
+ conn.close()
85
+
86
+ # Initialize the table on startup ONLY IF local
87
+ if MODE == "local":
88
+ create_user_table()
89
+
90
+ # ===========================
91
+ # DO NOT CHANGE: API ROUTES
92
+ # ===========================
93
+ @app.route('/sign-in', methods=['POST'])
94
+ def sign_in():
95
+ data = request.json
96
+ email = data.get('email')
97
+ password = data.get('password')
98
+
99
+ # Find user by email
100
+ conn = get_db_connection()
101
+ cursor = conn.cursor()
102
+ cursor.execute('SELECT * FROM Users WHERE email = ?', (email,))
103
+ user = cursor.fetchone()
104
+
105
+ if user:
106
+ # Check if the provided password matches the hashed password stored in the database
107
+ if check_password_hash(user[4], password): # user[4] is the password field
108
+ return jsonify({"message": "Login successful"}), 200
109
+ else:
110
+ return jsonify({"message": "Invalid email or password"}), 401
111
+ else:
112
+ return jsonify({"message": "Email not found"}), 404
113
+
114
+
115
+ @app.route('/sign-up', methods=['POST'])
116
+ def sign_up():
117
+ data = request.json
118
+ print("Received sign-up data:", data) # Log received data
119
+ name = data.get('name')
120
+ role = data.get('role')
121
+ email = data.get('email')
122
+ password = data.get('password')
123
+
124
+ # Check if email is valid
125
+ if not email or not password:
126
+ return jsonify({"message": "Email and password are required"}), 400
127
+
128
+ # Check if the email already exists
129
+ conn = get_db_connection()
130
+ cursor = conn.cursor()
131
+ cursor.execute('SELECT * FROM Users WHERE email = ?', (email,))
132
+ user = cursor.fetchone()
133
+
134
+ if user:
135
+ return jsonify({"message": "Email already in use"}), 400
136
+
137
+ # Hash the password before saving it
138
+ hashed_password = generate_password_hash(password)
139
+
140
+ # Insert the new user into the Users table
141
+ cursor.execute('INSERT INTO Users (name, role, email, password) VALUES (?, ?, ?, ?)', (name, role, email, hashed_password))
142
+ conn.commit()
143
+ cursor.close()
144
+ conn.close()
145
+ print("User created successfully:", name, email) # Log successful user creation
146
+ return jsonify({"message": "User created successfully"}), 201
147
+
148
+ if __name__ == '__main__':
149
+ # Default to 5000; if PORT is set (e.g., by Hugging Face), use it
150
+ port = int(os.getenv("PORT", "5000"))
151
+ app.run(host="0.0.0.0", port=port, debug=False)
what-everybody-is-saying-meta.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7cba71bc02b5ab52471e3370b4a26e41dee19ae69aa861bc9f5b3251f279a59
3
+ size 444180
what-everybody-is-saying.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fab95688745967c770668e6650535bd14d0c0e5baffebd389647b27edb07b76
3
+ size 990765