Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -1,21 +1,32 @@
|
|
| 1 |
"""
|
| 2 |
-
main.py — Iris AI Service (v1.
|
| 3 |
|
| 4 |
AI layer for the Iris Support Portal (IrisPlus / Unified Spark Desk).
|
| 5 |
Deployed as a HuggingFace Space monofile (Flask + Gemini + AssemblyAI + Firebase).
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
FEATURES:
|
| 8 |
-
1. WhatsApp Export → Knowledge Base (
|
| 9 |
2. Bulk KB Upload (CSV / Excel / PDF)
|
| 10 |
-
3. Natural Language + Voice Ticket Submission
|
| 11 |
-
4. System Tutorial Ingestion (
|
| 12 |
-
5. Agent NL/Voice Solution Writing
|
| 13 |
-
6. Iris Chatbot (KB
|
| 14 |
|
| 15 |
ENV VARS:
|
| 16 |
GOOGLE_API_KEY — Gemini API key
|
| 17 |
ASSEMBLYAI_API_KEY — AssemblyAI API key
|
| 18 |
FIREBASE — JSON string of Firebase service account
|
|
|
|
| 19 |
PORT — Server port (default 7860)
|
| 20 |
"""
|
| 21 |
|
|
@@ -27,8 +38,10 @@ import time
|
|
| 27 |
import logging
|
| 28 |
import base64
|
| 29 |
import hashlib
|
|
|
|
|
|
|
| 30 |
from datetime import datetime, timezone
|
| 31 |
-
from typing import Any, Dict, List, Optional
|
| 32 |
|
| 33 |
import requests
|
| 34 |
from flask import Flask, request, jsonify
|
|
@@ -52,7 +65,8 @@ except Exception as e:
|
|
| 52 |
logger.error("google-genai not installed: %s", e)
|
| 53 |
|
| 54 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
|
| 55 |
-
|
|
|
|
| 56 |
|
| 57 |
_gemini_client = None
|
| 58 |
if genai and GOOGLE_API_KEY:
|
|
@@ -99,7 +113,7 @@ def init_firestore() -> Optional[Any]:
|
|
| 99 |
|
| 100 |
db = init_firestore()
|
| 101 |
|
| 102 |
-
# ─── Optional
|
| 103 |
|
| 104 |
try:
|
| 105 |
import pandas as pd
|
|
@@ -119,28 +133,98 @@ app = Flask(__name__)
|
|
| 119 |
CORS(app)
|
| 120 |
|
| 121 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 122 |
-
# HELPERS
|
| 123 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
def _safe_json(text: str, fallback: Any) -> Any:
|
| 126 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
try:
|
| 128 |
-
clean = text.strip()
|
| 129 |
-
if "```json" in clean:
|
| 130 |
-
clean = clean.split("```json")[1].split("```")[0]
|
| 131 |
-
elif "```" in clean:
|
| 132 |
-
clean = clean.split("```")[1].split("```")[0]
|
| 133 |
return json.loads(clean)
|
| 134 |
-
except
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
def _gemini_text(prompt: str, json_mode: bool = False) -> str:
|
| 140 |
-
"""Call Gemini
|
| 141 |
if not _gemini_client:
|
| 142 |
return ""
|
| 143 |
-
cfg = genai_types.GenerateContentConfig(
|
|
|
|
|
|
|
| 144 |
try:
|
| 145 |
resp = _gemini_client.models.generate_content(
|
| 146 |
model=GEMINI_MODEL,
|
|
@@ -149,18 +233,35 @@ def _gemini_text(prompt: str, json_mode: bool = False) -> str:
|
|
| 149 |
)
|
| 150 |
return resp.text or ""
|
| 151 |
except Exception as e:
|
| 152 |
-
logger.error("Gemini call error: %s", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
return ""
|
| 154 |
|
| 155 |
|
| 156 |
def _article_fingerprint(title: str, content: str) -> str:
|
| 157 |
-
"""Stable hash to detect duplicate KB articles."""
|
| 158 |
raw = f"{title.strip().lower()}::{content.strip().lower()[:300]}"
|
| 159 |
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
| 160 |
|
| 161 |
|
| 162 |
def _get_existing_fingerprints() -> set:
|
| 163 |
-
"""Fetch all fingerprints already in Firestore KB."""
|
| 164 |
if not db:
|
| 165 |
return set()
|
| 166 |
try:
|
|
@@ -172,22 +273,17 @@ def _get_existing_fingerprints() -> set:
|
|
| 172 |
|
| 173 |
|
| 174 |
def _save_kb_articles(articles: List[Dict], source_label: str) -> Dict:
|
| 175 |
-
"""Save articles to Firestore, skip duplicates. Returns stats."""
|
| 176 |
if not db:
|
| 177 |
return {"saved": 0, "skipped": 0, "error": "Firebase unavailable"}
|
| 178 |
-
|
| 179 |
existing = _get_existing_fingerprints()
|
| 180 |
saved, skipped = 0, 0
|
| 181 |
-
|
| 182 |
for article in articles:
|
| 183 |
title = article.get("title", "Untitled")
|
| 184 |
content = article.get("content", "")
|
| 185 |
fp = _article_fingerprint(title, content)
|
| 186 |
-
|
| 187 |
if fp in existing:
|
| 188 |
skipped += 1
|
| 189 |
continue
|
| 190 |
-
|
| 191 |
doc = {
|
| 192 |
"title": title,
|
| 193 |
"content": content,
|
|
@@ -197,79 +293,327 @@ def _save_kb_articles(articles: List[Dict], source_label: str) -> Dict:
|
|
| 197 |
"fingerprint": fp,
|
| 198 |
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 199 |
}
|
| 200 |
-
# Carry timestamp crop info from tutorial ingestion if present
|
| 201 |
if article.get("timestamp_start") is not None:
|
| 202 |
doc["timestamp_start"] = article["timestamp_start"]
|
| 203 |
doc["timestamp_end"] = article.get("timestamp_end")
|
| 204 |
doc["video_url"] = article.get("video_url", "")
|
| 205 |
-
|
| 206 |
db.collection("iris_kb_articles").add(doc)
|
| 207 |
existing.add(fp)
|
| 208 |
saved += 1
|
| 209 |
-
|
| 210 |
return {"saved": saved, "skipped": skipped}
|
| 211 |
|
| 212 |
|
| 213 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 214 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 216 |
|
| 217 |
-
WHATSAPP_EXTRACTION_PROMPT = """
|
| 218 |
-
|
|
|
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
-
Return a
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
"content": "Full explanation: what the problem was and the step-by-step solution",
|
| 233 |
-
"category": "One of: Account, Billing, Technical, Feature, Other",
|
| 234 |
-
"tags": ["array", "of", "relevant", "keywords"]
|
| 235 |
-
}
|
| 236 |
|
| 237 |
-
|
|
|
|
| 238 |
|
| 239 |
-
|
|
|
|
|
|
|
| 240 |
"""
|
| 241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
@app.post("/api/kb/whatsapp-import")
|
| 243 |
def whatsapp_import():
|
| 244 |
"""
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
"""
|
| 248 |
-
|
| 249 |
-
|
| 250 |
|
| 251 |
-
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
| 256 |
|
| 257 |
-
|
|
|
|
|
|
|
| 258 |
|
| 259 |
-
|
| 260 |
-
|
| 261 |
|
| 262 |
-
|
| 263 |
-
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
return jsonify({
|
| 269 |
-
"ok":
|
| 270 |
-
"articles_found":
|
| 271 |
-
"saved":
|
| 272 |
-
"skipped_dupes":
|
| 273 |
})
|
| 274 |
|
| 275 |
|
|
@@ -278,7 +622,6 @@ def whatsapp_import():
|
|
| 278 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 279 |
|
| 280 |
def _extract_text_from_pdf_bytes(pdf_bytes: bytes) -> str:
|
| 281 |
-
"""Extract text from a PDF using pypdf, fallback to Gemini vision."""
|
| 282 |
if PYPDF_AVAILABLE:
|
| 283 |
try:
|
| 284 |
reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
|
|
@@ -288,12 +631,9 @@ def _extract_text_from_pdf_bytes(pdf_bytes: bytes) -> str:
|
|
| 288 |
return text
|
| 289 |
except Exception as e:
|
| 290 |
logger.warning("pypdf extraction failed: %s", e)
|
| 291 |
-
|
| 292 |
-
# Gemini inline_data fallback for scanned PDFs
|
| 293 |
if _gemini_client:
|
| 294 |
try:
|
| 295 |
-
|
| 296 |
-
resp = _gemini_client.models.generate_content(
|
| 297 |
model=GEMINI_MODEL,
|
| 298 |
contents=[
|
| 299 |
"Extract all text from this PDF document. Return plain text only.",
|
|
@@ -306,61 +646,41 @@ def _extract_text_from_pdf_bytes(pdf_bytes: bytes) -> str:
|
|
| 306 |
return ""
|
| 307 |
|
| 308 |
|
| 309 |
-
PDF_KB_PROMPT = """
|
| 310 |
-
You are a support knowledge base curator.
|
| 311 |
Convert the following document content into structured KB articles.
|
| 312 |
-
Each article
|
| 313 |
|
| 314 |
-
Return a
|
| 315 |
-
|
| 316 |
-
"title": "Short, searchable title",
|
| 317 |
-
"content": "Complete explanation in clear language",
|
| 318 |
-
"category": "One of: Account, Billing, Technical, Feature, Other",
|
| 319 |
-
"tags": ["keyword1", "keyword2"]
|
| 320 |
-
}
|
| 321 |
|
| 322 |
-
|
|
|
|
| 323 |
|
| 324 |
Document content:
|
| 325 |
"""
|
| 326 |
|
| 327 |
@app.post("/api/kb/bulk-upload")
|
| 328 |
def bulk_upload():
|
| 329 |
-
"""
|
| 330 |
-
Accepts multipart file upload. Supports: .csv, .xlsx, .xls, .pdf
|
| 331 |
-
CSV/Excel expected columns: title, content (+ optional: category, tags)
|
| 332 |
-
PDF: Gemini extracts and structures articles.
|
| 333 |
-
"""
|
| 334 |
if "file" not in request.files:
|
| 335 |
return jsonify({"ok": False, "error": "No file uploaded"}), 400
|
| 336 |
-
|
| 337 |
f = request.files["file"]
|
| 338 |
filename = f.filename or ""
|
| 339 |
ext = filename.rsplit(".", 1)[-1].lower()
|
| 340 |
file_data = f.read()
|
| 341 |
-
|
| 342 |
-
articles = []
|
| 343 |
|
| 344 |
if ext in ("csv", "xlsx", "xls"):
|
| 345 |
if not PANDAS_AVAILABLE:
|
| 346 |
return jsonify({"ok": False, "error": "pandas not installed on server"}), 500
|
| 347 |
try:
|
| 348 |
-
if ext == "csv"
|
| 349 |
-
df = pd.read_csv(io.BytesIO(file_data))
|
| 350 |
-
else:
|
| 351 |
-
df = pd.read_excel(io.BytesIO(file_data))
|
| 352 |
-
|
| 353 |
df.columns = [c.strip().lower() for c in df.columns]
|
| 354 |
-
|
| 355 |
if "title" not in df.columns or "content" not in df.columns:
|
| 356 |
return jsonify({"ok": False, "error": "CSV/Excel must have 'title' and 'content' columns"}), 400
|
| 357 |
-
|
| 358 |
for _, row in df.iterrows():
|
| 359 |
tags = []
|
| 360 |
if "tags" in df.columns and pd.notna(row.get("tags")):
|
| 361 |
-
|
| 362 |
-
tags = [t.strip() for t in re.split(r"[,;|]", raw_tags) if t.strip()]
|
| 363 |
-
|
| 364 |
articles.append({
|
| 365 |
"title": str(row["title"]).strip(),
|
| 366 |
"content": str(row["content"]).strip(),
|
|
@@ -368,100 +688,75 @@ def bulk_upload():
|
|
| 368 |
"tags": tags,
|
| 369 |
})
|
| 370 |
except Exception as e:
|
| 371 |
-
logger.error("Spreadsheet parse error: %s", e)
|
| 372 |
return jsonify({"ok": False, "error": f"Could not parse file: {e}"}), 400
|
| 373 |
|
| 374 |
elif ext == "pdf":
|
| 375 |
text = _extract_text_from_pdf_bytes(file_data)
|
| 376 |
if not text:
|
| 377 |
return jsonify({"ok": False, "error": "Could not extract text from PDF"}), 400
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
articles
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
return jsonify({"ok": False, "error": "Gemini PDF structuring failed"}), 500
|
| 384 |
else:
|
| 385 |
-
return jsonify({"ok": False, "error": f"Unsupported file type
|
| 386 |
|
| 387 |
if not articles:
|
| 388 |
return jsonify({"ok": False, "error": "No articles extracted from file"}), 400
|
| 389 |
|
| 390 |
stats = _save_kb_articles(articles, source_label=f"bulk_upload:{filename}")
|
| 391 |
-
return jsonify({
|
| 392 |
-
"ok": True,
|
| 393 |
-
"articles_found": len(articles),
|
| 394 |
-
"saved": stats["saved"],
|
| 395 |
-
"skipped_dupes": stats["skipped"],
|
| 396 |
-
})
|
| 397 |
|
| 398 |
|
| 399 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 400 |
# FEATURE 3 — Ticket Submission via NL Text or Voice
|
| 401 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 402 |
|
| 403 |
-
TICKET_EXTRACTION_PROMPT = """
|
| 404 |
-
You are a support ticket intake system for a software support portal.
|
| 405 |
|
| 406 |
A user has described their issue in natural language. Extract structured ticket fields.
|
| 407 |
|
| 408 |
-
Return
|
| 409 |
-
|
| 410 |
-
"title": "Concise ticket title (max 80 chars)",
|
| 411 |
-
"description": "Full detailed description of the issue, rewritten clearly in third person",
|
| 412 |
-
"category_hint": "Best matching category: Account | Billing | Technical | Feature | Other",
|
| 413 |
-
"priority_hint": "One of: low | medium | high | critical (based on urgency language)",
|
| 414 |
-
"keywords": ["array", "of", "technical", "keywords"]
|
| 415 |
-
}
|
| 416 |
|
| 417 |
-
|
|
|
|
|
|
|
|
|
|
| 418 |
"""
|
| 419 |
|
| 420 |
def _transcribe_audio_assemblyai(audio_b64: str, audio_format: str = "wav") -> str:
|
| 421 |
-
"""Upload audio to AssemblyAI and poll for transcript."""
|
| 422 |
if not ASSEMBLYAI_API_KEY:
|
| 423 |
return ""
|
| 424 |
-
|
| 425 |
audio_bytes = base64.b64decode(audio_b64)
|
| 426 |
headers = {"authorization": ASSEMBLYAI_API_KEY}
|
| 427 |
-
|
| 428 |
-
# 1. Upload
|
| 429 |
try:
|
| 430 |
upload_resp = requests.post(
|
| 431 |
f"{ASSEMBLYAI_BASE}/upload",
|
| 432 |
headers={**headers, "Content-Type": "application/octet-stream"},
|
| 433 |
-
data=audio_bytes,
|
| 434 |
-
timeout=30
|
| 435 |
)
|
| 436 |
upload_resp.raise_for_status()
|
| 437 |
upload_url = upload_resp.json().get("upload_url")
|
| 438 |
except Exception as e:
|
| 439 |
logger.error("AssemblyAI upload error: %s", e)
|
| 440 |
return ""
|
| 441 |
-
|
| 442 |
-
# 2. Request transcript
|
| 443 |
try:
|
| 444 |
tx_resp = requests.post(
|
| 445 |
f"{ASSEMBLYAI_BASE}/transcript",
|
| 446 |
headers={**headers, "Content-Type": "application/json"},
|
| 447 |
-
json={"audio_url": upload_url, "language_detection": True},
|
| 448 |
-
timeout=15
|
| 449 |
)
|
| 450 |
tx_resp.raise_for_status()
|
| 451 |
tx_id = tx_resp.json().get("id")
|
| 452 |
except Exception as e:
|
| 453 |
logger.error("AssemblyAI transcript request error: %s", e)
|
| 454 |
return ""
|
| 455 |
-
|
| 456 |
-
# 3. Poll
|
| 457 |
for _ in range(30):
|
| 458 |
time.sleep(3)
|
| 459 |
try:
|
| 460 |
-
poll
|
| 461 |
-
f"{ASSEMBLYAI_BASE}/transcript/{tx_id}",
|
| 462 |
-
headers=headers,
|
| 463 |
-
timeout=15
|
| 464 |
-
)
|
| 465 |
poll.raise_for_status()
|
| 466 |
result = poll.json()
|
| 467 |
status = result.get("status")
|
|
@@ -477,74 +772,47 @@ def _transcribe_audio_assemblyai(audio_b64: str, audio_format: str = "wav") -> s
|
|
| 477 |
|
| 478 |
@app.post("/api/tickets/submit-nl")
|
| 479 |
def submit_ticket_nl():
|
| 480 |
-
"""
|
| 481 |
-
POST body: { "message": "I can't log in, it says my account is locked...", "user_id": "..." }
|
| 482 |
-
Returns structured ticket fields for the frontend to pre-fill and submit.
|
| 483 |
-
"""
|
| 484 |
body = request.get_json(silent=True) or {}
|
| 485 |
message = body.get("message", "").strip()
|
| 486 |
user_id = body.get("user_id", "anonymous")
|
| 487 |
-
|
| 488 |
if not message:
|
| 489 |
return jsonify({"ok": False, "error": "message is required"}), 400
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
if not ticket.get("title"):
|
| 495 |
return jsonify({"ok": False, "error": "Could not extract ticket info from message"}), 500
|
| 496 |
-
|
| 497 |
-
# Log submission attempt
|
| 498 |
if db:
|
| 499 |
db.collection("iris_ai_ticket_drafts").add({
|
| 500 |
-
"user_id":
|
| 501 |
-
"
|
| 502 |
-
"extracted": ticket,
|
| 503 |
-
"channel": "nl_text",
|
| 504 |
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 505 |
})
|
| 506 |
-
|
| 507 |
return jsonify({"ok": True, "ticket": ticket})
|
| 508 |
|
| 509 |
|
| 510 |
@app.post("/api/tickets/submit-voice")
|
| 511 |
def submit_ticket_voice():
|
| 512 |
-
"""
|
| 513 |
-
POST body: { "audio_b64": "<base64 audio>", "audio_format": "wav", "user_id": "..." }
|
| 514 |
-
Transcribes audio via AssemblyAI, then extracts ticket via Gemini.
|
| 515 |
-
"""
|
| 516 |
body = request.get_json(silent=True) or {}
|
| 517 |
audio_b64 = body.get("audio_b64", "")
|
| 518 |
audio_format = body.get("audio_format", "wav")
|
| 519 |
user_id = body.get("user_id", "anonymous")
|
| 520 |
-
|
| 521 |
if not audio_b64:
|
| 522 |
return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
|
| 523 |
-
|
| 524 |
if not ASSEMBLYAI_API_KEY:
|
| 525 |
return jsonify({"ok": False, "error": "AssemblyAI not configured on server"}), 500
|
| 526 |
-
|
| 527 |
-
logger.info("Voice ticket: transcribing audio for user=%s", user_id)
|
| 528 |
transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
|
| 529 |
-
|
| 530 |
if not transcript:
|
| 531 |
return jsonify({"ok": False, "error": "Transcription failed or returned empty result"}), 500
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
if not ticket.get("title"):
|
| 537 |
return jsonify({"ok": False, "error": "Could not extract ticket info from transcript"}), 500
|
| 538 |
-
|
| 539 |
if db:
|
| 540 |
db.collection("iris_ai_ticket_drafts").add({
|
| 541 |
-
"user_id":
|
| 542 |
-
"
|
| 543 |
-
"extracted": ticket,
|
| 544 |
-
"channel": "voice",
|
| 545 |
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 546 |
})
|
| 547 |
-
|
| 548 |
return jsonify({"ok": True, "transcript": transcript, "ticket": ticket})
|
| 549 |
|
| 550 |
|
|
@@ -552,32 +820,22 @@ def submit_ticket_voice():
|
|
| 552 |
# FEATURE 4 — System Tutorial Ingestion
|
| 553 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 554 |
|
| 555 |
-
TUTORIAL_EXTRACTION_PROMPT = """
|
| 556 |
-
You are a knowledge base curator for a software support system.
|
| 557 |
|
| 558 |
-
You have
|
| 559 |
-
|
| 560 |
|
| 561 |
-
|
|
|
|
| 562 |
|
| 563 |
-
|
| 564 |
-
{
|
| 565 |
-
"title": "How to <do something> in Iris",
|
| 566 |
-
"content": "Step-by-step instructions based on the tutorial",
|
| 567 |
-
"category": "One of: Account | Tickets | Agents | Reports | Admin | Other",
|
| 568 |
-
"tags": ["keyword1", "keyword2"],
|
| 569 |
-
"timestamp_start": <seconds as integer>,
|
| 570 |
-
"timestamp_end": <seconds as integer>
|
| 571 |
-
}
|
| 572 |
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
Transcript (with timestamps in [MM:SS] or [HH:MM:SS] format):
|
| 576 |
"""
|
| 577 |
|
| 578 |
def _parse_timestamp_to_seconds(ts: str) -> int:
|
| 579 |
-
|
| 580 |
-
parts = ts.strip("[]").split(":")
|
| 581 |
try:
|
| 582 |
if len(parts) == 2:
|
| 583 |
return int(parts[0]) * 60 + int(parts[1])
|
|
@@ -587,169 +845,101 @@ def _parse_timestamp_to_seconds(ts: str) -> int:
|
|
| 587 |
pass
|
| 588 |
return 0
|
| 589 |
|
| 590 |
-
|
| 591 |
@app.post("/api/kb/tutorial-ingest")
|
| 592 |
def tutorial_ingest():
|
| 593 |
-
"""
|
| 594 |
-
POST body: {
|
| 595 |
-
"transcript": "<timestamped transcript text>",
|
| 596 |
-
"video_url": "https://...", (optional, for linking crop timestamps)
|
| 597 |
-
"video_title": "Getting Started with Iris"
|
| 598 |
-
}
|
| 599 |
-
Gemini extracts how-to articles with timestamp ranges.
|
| 600 |
-
"""
|
| 601 |
body = request.get_json(silent=True) or {}
|
| 602 |
transcript = body.get("transcript", "").strip()
|
| 603 |
video_url = body.get("video_url", "")
|
| 604 |
video_title = body.get("video_title", "Tutorial")
|
| 605 |
-
|
| 606 |
if not transcript:
|
| 607 |
return jsonify({"ok": False, "error": "transcript is required"}), 400
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
if not isinstance(articles, list):
|
| 615 |
-
return jsonify({"ok": False, "error": "Gemini returned unexpected format"}), 500
|
| 616 |
-
|
| 617 |
-
# Inject video metadata into each article
|
| 618 |
for a in articles:
|
| 619 |
a["video_url"] = video_url
|
| 620 |
a["video_title"] = video_title
|
| 621 |
-
# Ensure numeric seconds (Gemini may return the parsed value; validate it)
|
| 622 |
for ts_key in ("timestamp_start", "timestamp_end"):
|
| 623 |
val = a.get(ts_key)
|
| 624 |
if isinstance(val, str):
|
| 625 |
a[ts_key] = _parse_timestamp_to_seconds(val)
|
| 626 |
elif not isinstance(val, int):
|
| 627 |
a[ts_key] = 0
|
| 628 |
-
|
| 629 |
stats = _save_kb_articles(articles, source_label=f"tutorial:{video_title}")
|
| 630 |
-
|
| 631 |
-
return jsonify({
|
| 632 |
-
"ok": True,
|
| 633 |
-
"video_title": video_title,
|
| 634 |
-
"articles_found": len(articles),
|
| 635 |
-
"saved": stats["saved"],
|
| 636 |
-
"skipped_dupes": stats["skipped"],
|
| 637 |
-
})
|
| 638 |
|
| 639 |
|
| 640 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 641 |
# FEATURE 5 — Agent Solution Writing (NL Text + Voice)
|
| 642 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 643 |
|
| 644 |
-
SOLUTION_EXTRACTION_PROMPT = """
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
An agent or support staff has described a solution they discovered while resolving a ticket.
|
| 648 |
Structure this into a reusable KB article.
|
| 649 |
|
| 650 |
-
Return
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
"tags": ["relevant", "keywords"]
|
| 656 |
-
}
|
| 657 |
|
| 658 |
-
Agent
|
| 659 |
"""
|
| 660 |
|
| 661 |
@app.post("/api/kb/agent-solution-nl")
|
| 662 |
def agent_solution_nl():
|
| 663 |
-
"""
|
| 664 |
-
POST body: { "message": "I fixed ticket #123 by...", "agent_id": "...", "ticket_id": "..." }
|
| 665 |
-
Creates a KB article from agent's natural language solution description.
|
| 666 |
-
"""
|
| 667 |
body = request.get_json(silent=True) or {}
|
| 668 |
message = body.get("message", "").strip()
|
| 669 |
agent_id = body.get("agent_id", "unknown")
|
| 670 |
ticket_id = body.get("ticket_id", "")
|
| 671 |
-
|
| 672 |
if not message:
|
| 673 |
return jsonify({"ok": False, "error": "message is required"}), 400
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
if not article.get("title"):
|
| 679 |
return jsonify({"ok": False, "error": "Could not structure solution"}), 500
|
| 680 |
-
|
| 681 |
-
# Add ticket reference tag
|
| 682 |
if ticket_id:
|
| 683 |
article.setdefault("tags", []).append(f"ticket:{ticket_id}")
|
| 684 |
-
|
| 685 |
stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
|
| 686 |
-
|
| 687 |
-
return jsonify({
|
| 688 |
-
"ok": True,
|
| 689 |
-
"saved": stats["saved"],
|
| 690 |
-
"article": article,
|
| 691 |
-
})
|
| 692 |
|
| 693 |
|
| 694 |
@app.post("/api/kb/agent-solution-voice")
|
| 695 |
def agent_solution_voice():
|
| 696 |
-
"""
|
| 697 |
-
POST body: { "audio_b64": "...", "audio_format": "wav", "agent_id": "...", "ticket_id": "..." }
|
| 698 |
-
Transcribes agent's voice note, structures into KB article.
|
| 699 |
-
"""
|
| 700 |
body = request.get_json(silent=True) or {}
|
| 701 |
audio_b64 = body.get("audio_b64", "")
|
| 702 |
audio_format = body.get("audio_format", "wav")
|
| 703 |
agent_id = body.get("agent_id", "unknown")
|
| 704 |
ticket_id = body.get("ticket_id", "")
|
| 705 |
-
|
| 706 |
if not audio_b64:
|
| 707 |
return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
|
| 708 |
-
|
| 709 |
transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
|
| 710 |
-
|
| 711 |
if not transcript:
|
| 712 |
return jsonify({"ok": False, "error": "Transcription failed"}), 500
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
if not article.get("title"):
|
| 718 |
return jsonify({"ok": False, "error": "Could not structure solution from transcript"}), 500
|
| 719 |
-
|
| 720 |
if ticket_id:
|
| 721 |
article.setdefault("tags", []).append(f"ticket:{ticket_id}")
|
| 722 |
-
|
| 723 |
stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
|
| 724 |
-
|
| 725 |
-
return jsonify({
|
| 726 |
-
"ok": True,
|
| 727 |
-
"transcript": transcript,
|
| 728 |
-
"saved": stats["saved"],
|
| 729 |
-
"article": article,
|
| 730 |
-
})
|
| 731 |
|
| 732 |
|
| 733 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 734 |
-
# FEATURE 6 — Iris
|
| 735 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 736 |
|
| 737 |
def _search_kb(query: str, limit: int = 5) -> List[Dict]:
|
| 738 |
-
"""
|
| 739 |
-
Simple keyword search over Firestore KB articles.
|
| 740 |
-
Production upgrade: swap with a vector DB (e.g. Qdrant) or Vertex AI Search.
|
| 741 |
-
"""
|
| 742 |
if not db:
|
| 743 |
return []
|
| 744 |
-
|
| 745 |
query_terms = [t.lower() for t in query.split() if len(t) > 2]
|
| 746 |
-
|
| 747 |
try:
|
| 748 |
-
# Fetch recent articles (Firestore doesn't support full-text, this is a lightweight approach)
|
| 749 |
docs = db.collection("iris_kb_articles").order_by(
|
| 750 |
"created_at", direction=firestore.Query.DESCENDING
|
| 751 |
).limit(200).stream()
|
| 752 |
-
|
| 753 |
results = []
|
| 754 |
for doc in docs:
|
| 755 |
d = doc.to_dict()
|
|
@@ -757,60 +947,40 @@ def _search_kb(query: str, limit: int = 5) -> List[Dict]:
|
|
| 757 |
score = sum(1 for term in query_terms if term in text)
|
| 758 |
if score > 0:
|
| 759 |
results.append({"score": score, **d})
|
| 760 |
-
|
| 761 |
results.sort(key=lambda x: x["score"], reverse=True)
|
| 762 |
return results[:limit]
|
| 763 |
-
|
| 764 |
except Exception as e:
|
| 765 |
logger.error("KB search error: %s", e)
|
| 766 |
return []
|
| 767 |
|
| 768 |
|
| 769 |
-
CHATBOT_SYSTEM_PROMPT = """
|
| 770 |
-
You are Iris, an intelligent support assistant for the Iris Support Portal.
|
| 771 |
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
- If the answer is in a tutorial with a timestamp, mention the video and timestamp so the user can jump to that moment.
|
| 777 |
-
- Be concise, clear, and friendly.
|
| 778 |
-
- If you cannot find the answer, say so honestly and suggest submitting a ticket.
|
| 779 |
-
- Format step-by-step answers as numbered lists.
|
| 780 |
"""
|
| 781 |
|
| 782 |
@app.post("/api/chatbot/query")
|
| 783 |
def chatbot_query():
|
| 784 |
-
"""
|
| 785 |
-
POST body: {
|
| 786 |
-
"message": "How do I reset a user's password?",
|
| 787 |
-
"session_id": "...",
|
| 788 |
-
"user_id": "..."
|
| 789 |
-
}
|
| 790 |
-
RAG: searches KB, then uses Gemini to synthesize an answer.
|
| 791 |
-
"""
|
| 792 |
body = request.get_json(silent=True) or {}
|
| 793 |
message = body.get("message", "").strip()
|
| 794 |
session_id = body.get("session_id", "default")
|
| 795 |
user_id = body.get("user_id", "anonymous")
|
| 796 |
-
|
| 797 |
if not message:
|
| 798 |
return jsonify({"ok": False, "error": "message is required"}), 400
|
| 799 |
-
|
| 800 |
-
# Retrieve relevant KB context
|
| 801 |
kb_results = _search_kb(message, limit=5)
|
| 802 |
-
|
| 803 |
context_blocks = []
|
| 804 |
sources = []
|
| 805 |
for r in kb_results:
|
| 806 |
block = f"[Article: {r.get('title')}]\n{r.get('content', '')}"
|
| 807 |
if r.get("timestamp_start") is not None:
|
| 808 |
-
ts
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
block +=
|
| 813 |
-
block += f" — {url})" if url else ")"
|
| 814 |
context_blocks.append(block)
|
| 815 |
sources.append({
|
| 816 |
"title": r.get("title"),
|
|
@@ -819,75 +989,43 @@ def chatbot_query():
|
|
| 819 |
"ts_start": r.get("timestamp_start"),
|
| 820 |
"video_url": r.get("video_url"),
|
| 821 |
})
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
full_prompt = f"""{CHATBOT_SYSTEM_PROMPT}
|
| 826 |
-
|
| 827 |
-
KNOWLEDGE BASE CONTEXT:
|
| 828 |
-
{context_str}
|
| 829 |
-
|
| 830 |
-
USER QUESTION: {message}
|
| 831 |
-
|
| 832 |
-
Answer:"""
|
| 833 |
-
|
| 834 |
-
answer = _gemini_text(full_prompt)
|
| 835 |
-
|
| 836 |
if not answer:
|
| 837 |
-
answer = "
|
| 838 |
-
|
| 839 |
-
# Persist chat log
|
| 840 |
if db:
|
| 841 |
db.collection("iris_chatbot_logs").add({
|
| 842 |
-
"user_id":
|
| 843 |
-
"
|
| 844 |
-
"message": message,
|
| 845 |
-
"answer": answer,
|
| 846 |
-
"sources": sources,
|
| 847 |
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 848 |
})
|
| 849 |
-
|
| 850 |
-
return jsonify({
|
| 851 |
-
"ok": True,
|
| 852 |
-
"answer": answer,
|
| 853 |
-
"sources": sources,
|
| 854 |
-
})
|
| 855 |
|
| 856 |
|
| 857 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 858 |
-
# KB READ
|
| 859 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 860 |
|
| 861 |
@app.get("/api/kb/articles")
|
| 862 |
def list_kb_articles():
|
| 863 |
-
"""
|
| 864 |
-
GET /api/kb/articles?category=Technical&limit=50
|
| 865 |
-
Lists KB articles, optionally filtered by category.
|
| 866 |
-
"""
|
| 867 |
category = request.args.get("category", "")
|
| 868 |
limit = int(request.args.get("limit", 50))
|
| 869 |
-
|
| 870 |
if not db:
|
| 871 |
return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
|
| 872 |
-
|
| 873 |
try:
|
| 874 |
-
query = db.collection("iris_kb_articles").order_by(
|
| 875 |
-
"created_at", direction=firestore.Query.DESCENDING
|
| 876 |
-
)
|
| 877 |
if category:
|
| 878 |
query = query.where("category", "==", category)
|
| 879 |
-
|
| 880 |
docs = query.limit(limit).stream()
|
| 881 |
articles = [{"id": d.id, **d.to_dict()} for d in docs]
|
| 882 |
return jsonify({"ok": True, "articles": articles, "count": len(articles)})
|
| 883 |
except Exception as e:
|
| 884 |
-
logger.error("KB list error: %s", e)
|
| 885 |
return jsonify({"ok": False, "error": str(e)}), 500
|
| 886 |
|
| 887 |
|
| 888 |
@app.delete("/api/kb/articles/<article_id>")
|
| 889 |
def delete_kb_article(article_id: str):
|
| 890 |
-
"""DELETE /api/kb/articles/<id> — Admin only (JWT check to be enforced at gateway)"""
|
| 891 |
if not db:
|
| 892 |
return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
|
| 893 |
try:
|
|
@@ -910,14 +1048,14 @@ def health():
|
|
| 910 |
article_count = docs[0][0].value
|
| 911 |
except Exception:
|
| 912 |
pass
|
| 913 |
-
|
| 914 |
return jsonify({
|
| 915 |
-
"ok":
|
| 916 |
-
"service":
|
| 917 |
-
"
|
| 918 |
-
"
|
| 919 |
-
"
|
| 920 |
-
"
|
|
|
|
| 921 |
})
|
| 922 |
|
| 923 |
|
|
@@ -927,5 +1065,5 @@ def health():
|
|
| 927 |
|
| 928 |
if __name__ == "__main__":
|
| 929 |
port = int(os.environ.get("PORT", 7860))
|
| 930 |
-
logger.info("Iris AI Service starting on port %d", port)
|
| 931 |
app.run(host="0.0.0.0", port=port)
|
|
|
|
| 1 |
"""
|
| 2 |
+
main.py — Iris AI Service (v1.1 - April 2026)
|
| 3 |
|
| 4 |
AI layer for the Iris Support Portal (IrisPlus / Unified Spark Desk).
|
| 5 |
Deployed as a HuggingFace Space monofile (Flask + Gemini + AssemblyAI + Firebase).
|
| 6 |
|
| 7 |
+
CHANGELOG v1.1:
|
| 8 |
+
- Model: gemini-3.1-flash-lite-preview (multimodal reasoning)
|
| 9 |
+
- /api/kb/whatsapp-import: now accepts multipart ZIP upload
|
| 10 |
+
* Extracts _chat.txt + maps image files to <Media omitted> pointers
|
| 11 |
+
* Sliding-window chunking (~10k tokens / ~40k chars with overlap)
|
| 12 |
+
* Multimodal: sends images inline with their surrounding text chunk
|
| 13 |
+
* Strict JSON enforcement + pre-save validation
|
| 14 |
+
* JSON parse error recovery (regex extraction fallback)
|
| 15 |
+
- All other endpoints unchanged from v1.0
|
| 16 |
+
|
| 17 |
FEATURES:
|
| 18 |
+
1. WhatsApp Export → Knowledge Base (ZIP multimodal, chunked, additive)
|
| 19 |
2. Bulk KB Upload (CSV / Excel / PDF)
|
| 20 |
+
3. Natural Language + Voice Ticket Submission
|
| 21 |
+
4. System Tutorial Ingestion (timestamped transcripts)
|
| 22 |
+
5. Agent NL/Voice Solution Writing
|
| 23 |
+
6. Iris Chatbot (KB RAG)
|
| 24 |
|
| 25 |
ENV VARS:
|
| 26 |
GOOGLE_API_KEY — Gemini API key
|
| 27 |
ASSEMBLYAI_API_KEY — AssemblyAI API key
|
| 28 |
FIREBASE — JSON string of Firebase service account
|
| 29 |
+
GEMINI_MODEL — Override model (default: gemini-3.1-flash-lite-preview)
|
| 30 |
PORT — Server port (default 7860)
|
| 31 |
"""
|
| 32 |
|
|
|
|
| 38 |
import logging
|
| 39 |
import base64
|
| 40 |
import hashlib
|
| 41 |
+
import zipfile
|
| 42 |
+
import tempfile
|
| 43 |
from datetime import datetime, timezone
|
| 44 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 45 |
|
| 46 |
import requests
|
| 47 |
from flask import Flask, request, jsonify
|
|
|
|
| 65 |
logger.error("google-genai not installed: %s", e)
|
| 66 |
|
| 67 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
|
| 68 |
+
# v1.1: upgraded to gemini-3.1-flash-lite-preview for multimodal reasoning
|
| 69 |
+
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-3.1-flash-lite-preview")
|
| 70 |
|
| 71 |
_gemini_client = None
|
| 72 |
if genai and GOOGLE_API_KEY:
|
|
|
|
| 113 |
|
| 114 |
db = init_firestore()
|
| 115 |
|
| 116 |
+
# ─── Optional libs ────────────────────────────────────────────────────────────
|
| 117 |
|
| 118 |
try:
|
| 119 |
import pandas as pd
|
|
|
|
| 133 |
CORS(app)
|
| 134 |
|
| 135 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 136 |
+
# SHARED HELPERS
|
| 137 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 138 |
|
| 139 |
+
# Supported image extensions for multimodal WhatsApp ingestion
|
| 140 |
+
SUPPORTED_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
|
| 141 |
+
|
| 142 |
+
# Approx chars per token (conservative for mixed Shona/English/emoji content)
|
| 143 |
+
CHARS_PER_TOKEN = 4
|
| 144 |
+
# Target ~10k tokens per chunk with ~1k token overlap
|
| 145 |
+
CHUNK_CHARS = 40_000
|
| 146 |
+
OVERLAP_CHARS = 4_000
|
| 147 |
+
|
| 148 |
+
|
| 149 |
def _safe_json(text: str, fallback: Any) -> Any:
|
| 150 |
+
"""
|
| 151 |
+
Multi-strategy JSON parser.
|
| 152 |
+
1. Direct parse after stripping markdown fences.
|
| 153 |
+
2. Regex extraction of first [...] or {...} block.
|
| 154 |
+
3. Return fallback.
|
| 155 |
+
"""
|
| 156 |
+
if not text:
|
| 157 |
+
return fallback
|
| 158 |
+
|
| 159 |
+
# Strategy 1: strip fences
|
| 160 |
+
clean = text.strip()
|
| 161 |
+
for fence in ("```json", "```JSON", "```"):
|
| 162 |
+
if fence in clean:
|
| 163 |
+
parts = clean.split(fence)
|
| 164 |
+
# take the content between the first pair of fences
|
| 165 |
+
if len(parts) >= 3:
|
| 166 |
+
clean = parts[1].strip()
|
| 167 |
+
elif len(parts) == 2:
|
| 168 |
+
clean = parts[1].split("```")[0].strip()
|
| 169 |
+
break
|
| 170 |
+
|
| 171 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
return json.loads(clean)
|
| 173 |
+
except json.JSONDecodeError:
|
| 174 |
+
pass
|
| 175 |
+
|
| 176 |
+
# Strategy 2: regex — find outermost [...] array
|
| 177 |
+
arr_match = re.search(r'\[[\s\S]*\]', clean)
|
| 178 |
+
if arr_match:
|
| 179 |
+
try:
|
| 180 |
+
return json.loads(arr_match.group())
|
| 181 |
+
except json.JSONDecodeError:
|
| 182 |
+
pass
|
| 183 |
+
|
| 184 |
+
# Strategy 3: regex — find outermost {...} object
|
| 185 |
+
obj_match = re.search(r'\{[\s\S]*\}', clean)
|
| 186 |
+
if obj_match:
|
| 187 |
+
try:
|
| 188 |
+
return json.loads(obj_match.group())
|
| 189 |
+
except json.JSONDecodeError:
|
| 190 |
+
pass
|
| 191 |
+
|
| 192 |
+
logger.error("JSON parse exhausted all strategies. First 300 chars: %s", text[:300])
|
| 193 |
+
return fallback
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def _validate_articles(data: Any) -> List[Dict]:
|
| 197 |
+
"""
|
| 198 |
+
Validate that extracted articles are a list of dicts with required fields.
|
| 199 |
+
Filters out malformed items rather than failing the whole batch.
|
| 200 |
+
"""
|
| 201 |
+
if not isinstance(data, list):
|
| 202 |
+
logger.warning("Expected list from Gemini, got %s", type(data))
|
| 203 |
+
return []
|
| 204 |
+
valid = []
|
| 205 |
+
for item in data:
|
| 206 |
+
if not isinstance(item, dict):
|
| 207 |
+
continue
|
| 208 |
+
title = str(item.get("title", "")).strip()
|
| 209 |
+
content = str(item.get("content", "")).strip()
|
| 210 |
+
if len(title) < 3 or len(content) < 10:
|
| 211 |
+
continue
|
| 212 |
+
valid.append({
|
| 213 |
+
"title": title,
|
| 214 |
+
"content": content,
|
| 215 |
+
"category": str(item.get("category", "General")).strip() or "General",
|
| 216 |
+
"tags": item.get("tags", []) if isinstance(item.get("tags"), list) else [],
|
| 217 |
+
})
|
| 218 |
+
return valid
|
| 219 |
|
| 220 |
|
| 221 |
def _gemini_text(prompt: str, json_mode: bool = False) -> str:
|
| 222 |
+
"""Call Gemini with text-only content."""
|
| 223 |
if not _gemini_client:
|
| 224 |
return ""
|
| 225 |
+
cfg = genai_types.GenerateContentConfig(
|
| 226 |
+
response_mime_type="application/json"
|
| 227 |
+
) if json_mode else None
|
| 228 |
try:
|
| 229 |
resp = _gemini_client.models.generate_content(
|
| 230 |
model=GEMINI_MODEL,
|
|
|
|
| 233 |
)
|
| 234 |
return resp.text or ""
|
| 235 |
except Exception as e:
|
| 236 |
+
logger.error("Gemini text call error: %s", e)
|
| 237 |
+
return ""
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def _gemini_multimodal(parts: list, json_mode: bool = False) -> str:
|
| 241 |
+
"""Call Gemini with a mixed list of text strings and image Parts."""
|
| 242 |
+
if not _gemini_client:
|
| 243 |
+
return ""
|
| 244 |
+
cfg = genai_types.GenerateContentConfig(
|
| 245 |
+
response_mime_type="application/json"
|
| 246 |
+
) if json_mode else None
|
| 247 |
+
try:
|
| 248 |
+
resp = _gemini_client.models.generate_content(
|
| 249 |
+
model=GEMINI_MODEL,
|
| 250 |
+
contents=parts,
|
| 251 |
+
config=cfg
|
| 252 |
+
)
|
| 253 |
+
return resp.text or ""
|
| 254 |
+
except Exception as e:
|
| 255 |
+
logger.error("Gemini multimodal call error: %s", e)
|
| 256 |
return ""
|
| 257 |
|
| 258 |
|
| 259 |
def _article_fingerprint(title: str, content: str) -> str:
|
|
|
|
| 260 |
raw = f"{title.strip().lower()}::{content.strip().lower()[:300]}"
|
| 261 |
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
| 262 |
|
| 263 |
|
| 264 |
def _get_existing_fingerprints() -> set:
|
|
|
|
| 265 |
if not db:
|
| 266 |
return set()
|
| 267 |
try:
|
|
|
|
| 273 |
|
| 274 |
|
| 275 |
def _save_kb_articles(articles: List[Dict], source_label: str) -> Dict:
|
|
|
|
| 276 |
if not db:
|
| 277 |
return {"saved": 0, "skipped": 0, "error": "Firebase unavailable"}
|
|
|
|
| 278 |
existing = _get_existing_fingerprints()
|
| 279 |
saved, skipped = 0, 0
|
|
|
|
| 280 |
for article in articles:
|
| 281 |
title = article.get("title", "Untitled")
|
| 282 |
content = article.get("content", "")
|
| 283 |
fp = _article_fingerprint(title, content)
|
|
|
|
| 284 |
if fp in existing:
|
| 285 |
skipped += 1
|
| 286 |
continue
|
|
|
|
| 287 |
doc = {
|
| 288 |
"title": title,
|
| 289 |
"content": content,
|
|
|
|
| 293 |
"fingerprint": fp,
|
| 294 |
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 295 |
}
|
|
|
|
| 296 |
if article.get("timestamp_start") is not None:
|
| 297 |
doc["timestamp_start"] = article["timestamp_start"]
|
| 298 |
doc["timestamp_end"] = article.get("timestamp_end")
|
| 299 |
doc["video_url"] = article.get("video_url", "")
|
|
|
|
| 300 |
db.collection("iris_kb_articles").add(doc)
|
| 301 |
existing.add(fp)
|
| 302 |
saved += 1
|
|
|
|
| 303 |
return {"saved": saved, "skipped": skipped}
|
| 304 |
|
| 305 |
|
| 306 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 307 |
+
# WHATSAPP ZIP PROCESSOR
|
| 308 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 309 |
+
|
| 310 |
+
# Regex to match WhatsApp timestamp lines
|
| 311 |
+
# Handles both: DD/MM/YYYY, HH:MM - Sender: message
|
| 312 |
+
# and: DD/MM/YYYY, HH:MM am/pm - Sender: message
|
| 313 |
+
WA_LINE_RE = re.compile(
|
| 314 |
+
r'^\d{1,2}/\d{1,2}/\d{4},\s+\d{1,2}:\d{2}(?:\s*[ap]m)?\s+-\s+',
|
| 315 |
+
re.IGNORECASE
|
| 316 |
+
)
|
| 317 |
+
|
| 318 |
+
# Matches <Media omitted> or [filename.jpg] style media pointers
|
| 319 |
+
MEDIA_POINTER_RE = re.compile(
|
| 320 |
+
r'<Media omitted>|\[?([^\]]+\.(?:jpg|jpeg|png|webp|gif|mp4|opus|aac|m4a))\]?',
|
| 321 |
+
re.IGNORECASE
|
| 322 |
+
)
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
class WhatsAppZipProcessor:
|
| 326 |
+
"""
|
| 327 |
+
Handles extraction and multimodal chunking of a WhatsApp .zip export.
|
| 328 |
+
|
| 329 |
+
A WhatsApp export zip typically contains:
|
| 330 |
+
_chat.txt — the full conversation
|
| 331 |
+
IMG-YYYYMMDD-*.jpg — attached images
|
| 332 |
+
VID-*.mp4 — videos (we skip these, too large)
|
| 333 |
+
PTT-*.opus — voice notes (skipped)
|
| 334 |
+
"""
|
| 335 |
+
|
| 336 |
+
def __init__(self, zip_bytes: bytes):
|
| 337 |
+
self.zip_bytes = zip_bytes
|
| 338 |
+
self.chat_text = ""
|
| 339 |
+
self.media_map: Dict[str, bytes] = {} # filename -> raw bytes
|
| 340 |
+
|
| 341 |
+
def extract(self) -> bool:
|
| 342 |
+
"""Extract chat text and image files from ZIP. Returns True on success."""
|
| 343 |
+
try:
|
| 344 |
+
with zipfile.ZipFile(io.BytesIO(self.zip_bytes)) as zf:
|
| 345 |
+
names = zf.namelist()
|
| 346 |
+
logger.info("ZIP contains %d files: %s", len(names), names[:20])
|
| 347 |
+
|
| 348 |
+
# Find chat file — WhatsApp names it _chat.txt or WhatsApp Chat with *.txt
|
| 349 |
+
chat_file = None
|
| 350 |
+
for name in names:
|
| 351 |
+
base = os.path.basename(name).lower()
|
| 352 |
+
if base == "_chat.txt" or (base.endswith(".txt") and "chat" in base):
|
| 353 |
+
chat_file = name
|
| 354 |
+
break
|
| 355 |
+
if not chat_file:
|
| 356 |
+
# Fallback: any .txt file
|
| 357 |
+
txts = [n for n in names if n.lower().endswith(".txt")]
|
| 358 |
+
if txts:
|
| 359 |
+
chat_file = txts[0]
|
| 360 |
+
|
| 361 |
+
if not chat_file:
|
| 362 |
+
logger.error("No chat .txt found in ZIP")
|
| 363 |
+
return False
|
| 364 |
+
|
| 365 |
+
raw = zf.read(chat_file)
|
| 366 |
+
self.chat_text = raw.decode("utf-8", errors="replace")
|
| 367 |
+
logger.info("Chat text extracted: %d chars from %s", len(self.chat_text), chat_file)
|
| 368 |
+
|
| 369 |
+
# Extract images (skip videos and audio — too large / not useful for KB)
|
| 370 |
+
for name in names:
|
| 371 |
+
ext = os.path.splitext(name.lower())[1]
|
| 372 |
+
if ext in SUPPORTED_IMAGE_EXTS:
|
| 373 |
+
try:
|
| 374 |
+
self.media_map[os.path.basename(name)] = zf.read(name)
|
| 375 |
+
except Exception as e:
|
| 376 |
+
logger.warning("Could not read media file %s: %s", name, e)
|
| 377 |
+
|
| 378 |
+
logger.info("Media files extracted: %d images", len(self.media_map))
|
| 379 |
+
return True
|
| 380 |
+
|
| 381 |
+
except zipfile.BadZipFile as e:
|
| 382 |
+
logger.error("Bad ZIP file: %s", e)
|
| 383 |
+
return False
|
| 384 |
+
except Exception as e:
|
| 385 |
+
logger.error("ZIP extraction error: %s", e)
|
| 386 |
+
return False
|
| 387 |
+
|
| 388 |
+
def _resolve_media_in_line(self, line: str) -> Optional[bytes]:
|
| 389 |
+
"""
|
| 390 |
+
Given a chat line, check if it references a media file we have.
|
| 391 |
+
Returns the image bytes if found, else None.
|
| 392 |
+
"""
|
| 393 |
+
match = MEDIA_POINTER_RE.search(line)
|
| 394 |
+
if not match:
|
| 395 |
+
return None
|
| 396 |
+
filename = match.group(1) # group 1 = explicit filename, None for <Media omitted>
|
| 397 |
+
if filename:
|
| 398 |
+
fname = os.path.basename(filename)
|
| 399 |
+
if fname in self.media_map:
|
| 400 |
+
return self.media_map[fname]
|
| 401 |
+
# <Media omitted> — we can't recover the file since it wasn't exported
|
| 402 |
+
return None
|
| 403 |
+
|
| 404 |
+
def build_chunks(self) -> List[Dict]:
|
| 405 |
+
"""
|
| 406 |
+
Split chat text into overlapping chunks, each annotated with
|
| 407 |
+
the image bytes found within that chunk.
|
| 408 |
+
|
| 409 |
+
Returns list of:
|
| 410 |
+
{ "text": str, "images": [bytes, ...], "line_range": (start, end) }
|
| 411 |
+
"""
|
| 412 |
+
lines = self.chat_text.splitlines()
|
| 413 |
+
chunks = []
|
| 414 |
+
|
| 415 |
+
i = 0
|
| 416 |
+
total = len(lines)
|
| 417 |
+
char_count = 0
|
| 418 |
+
chunk_lines: List[str] = []
|
| 419 |
+
chunk_images: List[bytes] = []
|
| 420 |
+
|
| 421 |
+
while i < total:
|
| 422 |
+
line = lines[i]
|
| 423 |
+
chunk_lines.append(line)
|
| 424 |
+
char_count += len(line) + 1 # +1 for newline
|
| 425 |
+
|
| 426 |
+
# Check if this line has an image we can include
|
| 427 |
+
img_bytes = self._resolve_media_in_line(line)
|
| 428 |
+
if img_bytes and len(chunk_images) < 5: # cap images per chunk
|
| 429 |
+
chunk_images.append(img_bytes)
|
| 430 |
+
|
| 431 |
+
if char_count >= CHUNK_CHARS or i == total - 1:
|
| 432 |
+
chunks.append({
|
| 433 |
+
"text": "\n".join(chunk_lines),
|
| 434 |
+
"images": chunk_images[:],
|
| 435 |
+
"line_range": (i - len(chunk_lines) + 1, i)
|
| 436 |
+
})
|
| 437 |
+
logger.info(
|
| 438 |
+
"Chunk %d: %d lines, %d chars, %d images",
|
| 439 |
+
len(chunks), len(chunk_lines), char_count, len(chunk_images)
|
| 440 |
+
)
|
| 441 |
+
# Overlap: keep last OVERLAP_CHARS worth of lines for next chunk
|
| 442 |
+
overlap_text = 0
|
| 443 |
+
overlap_start = len(chunk_lines) - 1
|
| 444 |
+
while overlap_start > 0 and overlap_text < OVERLAP_CHARS:
|
| 445 |
+
overlap_text += len(chunk_lines[overlap_start]) + 1
|
| 446 |
+
overlap_start -= 1
|
| 447 |
+
chunk_lines = chunk_lines[overlap_start:]
|
| 448 |
+
chunk_images = []
|
| 449 |
+
char_count = sum(len(l) + 1 for l in chunk_lines)
|
| 450 |
+
i += 1
|
| 451 |
+
|
| 452 |
+
logger.info("Total chunks: %d", len(chunks))
|
| 453 |
+
return chunks
|
| 454 |
+
|
| 455 |
+
|
| 456 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 457 |
+
# WHATSAPP EXTRACTION PROMPT
|
| 458 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 459 |
|
| 460 |
+
WHATSAPP_EXTRACTION_PROMPT = """You are a support knowledge base curator for the Iris field staff management app used in Zimbabwe.
|
| 461 |
+
|
| 462 |
+
Your task: analyse this WhatsApp support group chat segment and extract ONLY clear problem→solution pairs.
|
| 463 |
|
| 464 |
+
CONTEXT ABOUT THIS APP:
|
| 465 |
+
- "Iris" is a mobile attendance/location tracking app used by field sales reps at retail stores.
|
| 466 |
+
- Common issues: GPS location not detected, clock-in failures, app killed by Android battery optimiser,
|
| 467 |
+
teller passkey problems, hours not recording correctly, store radius too small, wrong teller name shown.
|
| 468 |
+
- Messages mix English, Shona, and Ndebele. Understand regional vernacular (e.g. "irikudzima" = switching off,
|
| 469 |
+
"ndakashanda" = I worked, "short yemahours" = hours shortage, "gadzirisayi" = fix it through).
|
| 470 |
+
- If screenshots show Android error dialogs (e.g. "Service killed by system", "App stopped"), reason through
|
| 471 |
+
what that means for Android background restriction and include it in the solution.
|
| 472 |
|
| 473 |
+
STRICT RULES:
|
| 474 |
+
1. Extract ONLY exchanges where a user described a problem AND a named support person (Tendayi, Tony, Violet,
|
| 475 |
+
Rufaro, Albrighton, Ishmael, or any named responder) provided a working solution or clear instruction.
|
| 476 |
+
2. Ignore: greetings, media-only messages, deleted messages, clock-in screenshots with no text context,
|
| 477 |
+
messages from unknown numbers with no solution attached.
|
| 478 |
+
3. Each article must be self-contained and usable by a support agent in future.
|
| 479 |
+
4. Translate all Shona/Ndebele problem descriptions to English in the article content.
|
| 480 |
+
5. If a screenshot appears to show an Android error or GPS issue, reason through the likely cause and
|
| 481 |
+
include that reasoning in the solution content.
|
| 482 |
|
| 483 |
+
OUTPUT FORMAT: Return ONLY a valid JSON array. No preamble, no explanation, no markdown fences.
|
| 484 |
+
Every string value MUST be properly JSON-escaped. Do not use unescaped newlines, tabs, or quotes inside strings.
|
| 485 |
+
Use \\n for line breaks within content strings.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
|
| 487 |
+
Schema per item:
|
| 488 |
+
{"title": "string (max 80 chars)", "content": "string (escaped, solution steps)", "category": "one of: Account|Technical|Location|Attendance|Device|Other", "tags": ["array", "of", "strings"]}
|
| 489 |
|
| 490 |
+
If no valid problem→solution pairs exist in this segment, return an empty array: []
|
| 491 |
+
|
| 492 |
+
Chat segment:
|
| 493 |
"""
|
| 494 |
|
| 495 |
+
|
| 496 |
+
def _process_chunk_with_gemini(chunk: Dict) -> List[Dict]:
|
| 497 |
+
"""
|
| 498 |
+
Send a single chunk (text + optional images) to Gemini.
|
| 499 |
+
Returns validated list of article dicts.
|
| 500 |
+
"""
|
| 501 |
+
text_part = WHATSAPP_EXTRACTION_PROMPT + chunk["text"]
|
| 502 |
+
images = chunk.get("images", [])
|
| 503 |
+
|
| 504 |
+
if images and _gemini_client:
|
| 505 |
+
# Build multimodal content list
|
| 506 |
+
parts = [text_part]
|
| 507 |
+
for img_bytes in images:
|
| 508 |
+
# Detect mime type from magic bytes
|
| 509 |
+
mime = "image/jpeg"
|
| 510 |
+
if img_bytes[:4] == b'\x89PNG':
|
| 511 |
+
mime = "image/png"
|
| 512 |
+
elif img_bytes[:4] == b'RIFF':
|
| 513 |
+
mime = "image/webp"
|
| 514 |
+
parts.append(
|
| 515 |
+
genai_types.Part.from_bytes(data=img_bytes, mime_type=mime)
|
| 516 |
+
)
|
| 517 |
+
raw = _gemini_multimodal(parts, json_mode=True)
|
| 518 |
+
else:
|
| 519 |
+
raw = _gemini_text(text_part, json_mode=True)
|
| 520 |
+
|
| 521 |
+
if not raw:
|
| 522 |
+
logger.warning("Empty Gemini response for chunk")
|
| 523 |
+
return []
|
| 524 |
+
|
| 525 |
+
parsed = _safe_json(raw, [])
|
| 526 |
+
return _validate_articles(parsed)
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 530 |
+
# FEATURE 1 — WhatsApp Export → Knowledge Base (v1.1: ZIP multimodal + chunked)
|
| 531 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 532 |
+
|
| 533 |
@app.post("/api/kb/whatsapp-import")
|
| 534 |
def whatsapp_import():
|
| 535 |
"""
|
| 536 |
+
Accepts EITHER:
|
| 537 |
+
(a) multipart file upload with field "file" containing a .zip WhatsApp export, OR
|
| 538 |
+
(b) JSON body { "chat_text": "..." } for plain text (legacy support)
|
| 539 |
+
|
| 540 |
+
Processes in sliding-window chunks, sends images to Gemini multimodally.
|
| 541 |
+
Saves new articles only (additive, dedup by fingerprint).
|
| 542 |
"""
|
| 543 |
+
all_articles: List[Dict] = []
|
| 544 |
+
source_label = "whatsapp_export"
|
| 545 |
|
| 546 |
+
# ── Branch A: ZIP upload ──────────────────────────────────────────────────
|
| 547 |
+
if "file" in request.files:
|
| 548 |
+
f = request.files["file"]
|
| 549 |
+
filename = f.filename or ""
|
| 550 |
+
if not filename.lower().endswith(".zip"):
|
| 551 |
+
return jsonify({"ok": False, "error": "Expected a .zip WhatsApp export file"}), 400
|
| 552 |
|
| 553 |
+
zip_bytes = f.read()
|
| 554 |
+
logger.info("WhatsApp ZIP upload: %d bytes, filename=%s", len(zip_bytes), filename)
|
| 555 |
|
| 556 |
+
processor = WhatsAppZipProcessor(zip_bytes)
|
| 557 |
+
if not processor.extract():
|
| 558 |
+
return jsonify({"ok": False, "error": "Could not extract chat from ZIP. Ensure it is a valid WhatsApp export."}), 400
|
| 559 |
|
| 560 |
+
if len(processor.chat_text) < 100:
|
| 561 |
+
return jsonify({"ok": False, "error": "Extracted chat text too short to process"}), 400
|
| 562 |
|
| 563 |
+
chunks = processor.build_chunks()
|
| 564 |
+
source_label = f"whatsapp_zip:{filename}"
|
| 565 |
|
| 566 |
+
for idx, chunk in enumerate(chunks):
|
| 567 |
+
logger.info("Processing chunk %d/%d", idx + 1, len(chunks))
|
| 568 |
+
articles = _process_chunk_with_gemini(chunk)
|
| 569 |
+
all_articles.extend(articles)
|
| 570 |
+
logger.info("Chunk %d yielded %d articles (running total: %d)", idx + 1, len(articles), len(all_articles))
|
| 571 |
+
|
| 572 |
+
# ── Branch B: Legacy plain text JSON body ─────────────────────────────────
|
| 573 |
+
else:
|
| 574 |
+
body = request.get_json(silent=True) or {}
|
| 575 |
+
raw_chat = body.get("chat_text", "").strip()
|
| 576 |
+
if not raw_chat:
|
| 577 |
+
return jsonify({"ok": False, "error": "Provide a .zip file upload or chat_text in JSON body"}), 400
|
| 578 |
+
if len(raw_chat) < 100:
|
| 579 |
+
return jsonify({"ok": False, "error": "Chat text too short to process"}), 400
|
| 580 |
+
|
| 581 |
+
logger.info("WhatsApp plain text import: %d chars", len(raw_chat))
|
| 582 |
+
|
| 583 |
+
# Chunk the plain text too (handles large exports)
|
| 584 |
+
lines = raw_chat.splitlines()
|
| 585 |
+
pseudo_zip = type("PseudoZip", (), {
|
| 586 |
+
"chat_text": raw_chat,
|
| 587 |
+
"media_map": {}
|
| 588 |
+
})()
|
| 589 |
+
processor = WhatsAppZipProcessor(b"")
|
| 590 |
+
processor.chat_text = raw_chat
|
| 591 |
+
processor.media_map = {}
|
| 592 |
+
chunks = processor.build_chunks()
|
| 593 |
+
|
| 594 |
+
for idx, chunk in enumerate(chunks):
|
| 595 |
+
logger.info("Processing text chunk %d/%d", idx + 1, len(chunks))
|
| 596 |
+
articles = _process_chunk_with_gemini(chunk)
|
| 597 |
+
all_articles.extend(articles)
|
| 598 |
+
|
| 599 |
+
if not all_articles:
|
| 600 |
+
logger.info("No articles extracted from this export")
|
| 601 |
+
return jsonify({
|
| 602 |
+
"ok": True,
|
| 603 |
+
"articles_found": 0,
|
| 604 |
+
"saved": 0,
|
| 605 |
+
"skipped_dupes": 0,
|
| 606 |
+
"note": "No clear problem→solution pairs found in this chat segment"
|
| 607 |
+
})
|
| 608 |
+
|
| 609 |
+
stats = _save_kb_articles(all_articles, source_label=source_label)
|
| 610 |
+
logger.info("WhatsApp import complete: found=%d, %s", len(all_articles), stats)
|
| 611 |
|
| 612 |
return jsonify({
|
| 613 |
+
"ok": True,
|
| 614 |
+
"articles_found": len(all_articles),
|
| 615 |
+
"saved": stats["saved"],
|
| 616 |
+
"skipped_dupes": stats["skipped"],
|
| 617 |
})
|
| 618 |
|
| 619 |
|
|
|
|
| 622 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 623 |
|
| 624 |
def _extract_text_from_pdf_bytes(pdf_bytes: bytes) -> str:
|
|
|
|
| 625 |
if PYPDF_AVAILABLE:
|
| 626 |
try:
|
| 627 |
reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
|
|
|
|
| 631 |
return text
|
| 632 |
except Exception as e:
|
| 633 |
logger.warning("pypdf extraction failed: %s", e)
|
|
|
|
|
|
|
| 634 |
if _gemini_client:
|
| 635 |
try:
|
| 636 |
+
resp = _gemini_client.models.generate_content(
|
|
|
|
| 637 |
model=GEMINI_MODEL,
|
| 638 |
contents=[
|
| 639 |
"Extract all text from this PDF document. Return plain text only.",
|
|
|
|
| 646 |
return ""
|
| 647 |
|
| 648 |
|
| 649 |
+
PDF_KB_PROMPT = """You are a support knowledge base curator.
|
|
|
|
| 650 |
Convert the following document content into structured KB articles.
|
| 651 |
+
Each article covers one distinct topic, issue, or procedure.
|
| 652 |
|
| 653 |
+
Return ONLY a valid JSON array — no preamble, no markdown fences.
|
| 654 |
+
All string values must be properly JSON-escaped (no raw newlines inside strings, use \\n).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
|
| 656 |
+
Schema per item:
|
| 657 |
+
{"title": "string", "content": "string", "category": "one of: Account|Billing|Technical|Feature|Other", "tags": ["string"]}
|
| 658 |
|
| 659 |
Document content:
|
| 660 |
"""
|
| 661 |
|
| 662 |
@app.post("/api/kb/bulk-upload")
|
| 663 |
def bulk_upload():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 664 |
if "file" not in request.files:
|
| 665 |
return jsonify({"ok": False, "error": "No file uploaded"}), 400
|
|
|
|
| 666 |
f = request.files["file"]
|
| 667 |
filename = f.filename or ""
|
| 668 |
ext = filename.rsplit(".", 1)[-1].lower()
|
| 669 |
file_data = f.read()
|
| 670 |
+
articles = []
|
|
|
|
| 671 |
|
| 672 |
if ext in ("csv", "xlsx", "xls"):
|
| 673 |
if not PANDAS_AVAILABLE:
|
| 674 |
return jsonify({"ok": False, "error": "pandas not installed on server"}), 500
|
| 675 |
try:
|
| 676 |
+
df = pd.read_csv(io.BytesIO(file_data)) if ext == "csv" else pd.read_excel(io.BytesIO(file_data))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
df.columns = [c.strip().lower() for c in df.columns]
|
|
|
|
| 678 |
if "title" not in df.columns or "content" not in df.columns:
|
| 679 |
return jsonify({"ok": False, "error": "CSV/Excel must have 'title' and 'content' columns"}), 400
|
|
|
|
| 680 |
for _, row in df.iterrows():
|
| 681 |
tags = []
|
| 682 |
if "tags" in df.columns and pd.notna(row.get("tags")):
|
| 683 |
+
tags = [t.strip() for t in re.split(r"[,;|]", str(row["tags"])) if t.strip()]
|
|
|
|
|
|
|
| 684 |
articles.append({
|
| 685 |
"title": str(row["title"]).strip(),
|
| 686 |
"content": str(row["content"]).strip(),
|
|
|
|
| 688 |
"tags": tags,
|
| 689 |
})
|
| 690 |
except Exception as e:
|
|
|
|
| 691 |
return jsonify({"ok": False, "error": f"Could not parse file: {e}"}), 400
|
| 692 |
|
| 693 |
elif ext == "pdf":
|
| 694 |
text = _extract_text_from_pdf_bytes(file_data)
|
| 695 |
if not text:
|
| 696 |
return jsonify({"ok": False, "error": "Could not extract text from PDF"}), 400
|
| 697 |
+
raw = _gemini_text(PDF_KB_PROMPT + text[:50000], json_mode=True)
|
| 698 |
+
parsed = _safe_json(raw, [])
|
| 699 |
+
articles = _validate_articles(parsed)
|
| 700 |
+
if not articles:
|
| 701 |
+
return jsonify({"ok": False, "error": "Gemini PDF structuring returned no valid articles"}), 500
|
|
|
|
| 702 |
else:
|
| 703 |
+
return jsonify({"ok": False, "error": f"Unsupported file type .{ext}. Use csv, xlsx, or pdf"}), 400
|
| 704 |
|
| 705 |
if not articles:
|
| 706 |
return jsonify({"ok": False, "error": "No articles extracted from file"}), 400
|
| 707 |
|
| 708 |
stats = _save_kb_articles(articles, source_label=f"bulk_upload:{filename}")
|
| 709 |
+
return jsonify({"ok": True, "articles_found": len(articles), "saved": stats["saved"], "skipped_dupes": stats["skipped"]})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
|
| 711 |
|
| 712 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 713 |
# FEATURE 3 — Ticket Submission via NL Text or Voice
|
| 714 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 715 |
|
| 716 |
+
TICKET_EXTRACTION_PROMPT = """You are a support ticket intake system for a software support portal.
|
|
|
|
| 717 |
|
| 718 |
A user has described their issue in natural language. Extract structured ticket fields.
|
| 719 |
|
| 720 |
+
Return ONLY a valid JSON object — no preamble, no markdown fences.
|
| 721 |
+
All string values must be properly JSON-escaped.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 722 |
|
| 723 |
+
Schema:
|
| 724 |
+
{"title": "string (max 80 chars)", "description": "string (full clear description)", "category_hint": "one of: Account|Billing|Technical|Feature|Other", "priority_hint": "one of: low|medium|high|critical", "keywords": ["string"]}
|
| 725 |
+
|
| 726 |
+
User message:
|
| 727 |
"""
|
| 728 |
|
| 729 |
def _transcribe_audio_assemblyai(audio_b64: str, audio_format: str = "wav") -> str:
|
|
|
|
| 730 |
if not ASSEMBLYAI_API_KEY:
|
| 731 |
return ""
|
|
|
|
| 732 |
audio_bytes = base64.b64decode(audio_b64)
|
| 733 |
headers = {"authorization": ASSEMBLYAI_API_KEY}
|
|
|
|
|
|
|
| 734 |
try:
|
| 735 |
upload_resp = requests.post(
|
| 736 |
f"{ASSEMBLYAI_BASE}/upload",
|
| 737 |
headers={**headers, "Content-Type": "application/octet-stream"},
|
| 738 |
+
data=audio_bytes, timeout=30
|
|
|
|
| 739 |
)
|
| 740 |
upload_resp.raise_for_status()
|
| 741 |
upload_url = upload_resp.json().get("upload_url")
|
| 742 |
except Exception as e:
|
| 743 |
logger.error("AssemblyAI upload error: %s", e)
|
| 744 |
return ""
|
|
|
|
|
|
|
| 745 |
try:
|
| 746 |
tx_resp = requests.post(
|
| 747 |
f"{ASSEMBLYAI_BASE}/transcript",
|
| 748 |
headers={**headers, "Content-Type": "application/json"},
|
| 749 |
+
json={"audio_url": upload_url, "language_detection": True}, timeout=15
|
|
|
|
| 750 |
)
|
| 751 |
tx_resp.raise_for_status()
|
| 752 |
tx_id = tx_resp.json().get("id")
|
| 753 |
except Exception as e:
|
| 754 |
logger.error("AssemblyAI transcript request error: %s", e)
|
| 755 |
return ""
|
|
|
|
|
|
|
| 756 |
for _ in range(30):
|
| 757 |
time.sleep(3)
|
| 758 |
try:
|
| 759 |
+
poll = requests.get(f"{ASSEMBLYAI_BASE}/transcript/{tx_id}", headers=headers, timeout=15)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
poll.raise_for_status()
|
| 761 |
result = poll.json()
|
| 762 |
status = result.get("status")
|
|
|
|
| 772 |
|
| 773 |
@app.post("/api/tickets/submit-nl")
|
| 774 |
def submit_ticket_nl():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 775 |
body = request.get_json(silent=True) or {}
|
| 776 |
message = body.get("message", "").strip()
|
| 777 |
user_id = body.get("user_id", "anonymous")
|
|
|
|
| 778 |
if not message:
|
| 779 |
return jsonify({"ok": False, "error": "message is required"}), 400
|
| 780 |
+
raw = _gemini_text(TICKET_EXTRACTION_PROMPT + message, json_mode=True)
|
| 781 |
+
ticket = _safe_json(raw, {})
|
| 782 |
+
if not isinstance(ticket, dict) or not ticket.get("title"):
|
|
|
|
|
|
|
| 783 |
return jsonify({"ok": False, "error": "Could not extract ticket info from message"}), 500
|
|
|
|
|
|
|
| 784 |
if db:
|
| 785 |
db.collection("iris_ai_ticket_drafts").add({
|
| 786 |
+
"user_id": user_id, "raw_input": message,
|
| 787 |
+
"extracted": ticket, "channel": "nl_text",
|
|
|
|
|
|
|
| 788 |
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 789 |
})
|
|
|
|
| 790 |
return jsonify({"ok": True, "ticket": ticket})
|
| 791 |
|
| 792 |
|
| 793 |
@app.post("/api/tickets/submit-voice")
|
| 794 |
def submit_ticket_voice():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 795 |
body = request.get_json(silent=True) or {}
|
| 796 |
audio_b64 = body.get("audio_b64", "")
|
| 797 |
audio_format = body.get("audio_format", "wav")
|
| 798 |
user_id = body.get("user_id", "anonymous")
|
|
|
|
| 799 |
if not audio_b64:
|
| 800 |
return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
|
|
|
|
| 801 |
if not ASSEMBLYAI_API_KEY:
|
| 802 |
return jsonify({"ok": False, "error": "AssemblyAI not configured on server"}), 500
|
|
|
|
|
|
|
| 803 |
transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
|
|
|
|
| 804 |
if not transcript:
|
| 805 |
return jsonify({"ok": False, "error": "Transcription failed or returned empty result"}), 500
|
| 806 |
+
raw = _gemini_text(TICKET_EXTRACTION_PROMPT + transcript, json_mode=True)
|
| 807 |
+
ticket = _safe_json(raw, {})
|
| 808 |
+
if not isinstance(ticket, dict) or not ticket.get("title"):
|
|
|
|
|
|
|
| 809 |
return jsonify({"ok": False, "error": "Could not extract ticket info from transcript"}), 500
|
|
|
|
| 810 |
if db:
|
| 811 |
db.collection("iris_ai_ticket_drafts").add({
|
| 812 |
+
"user_id": user_id, "raw_input": transcript,
|
| 813 |
+
"extracted": ticket, "channel": "voice",
|
|
|
|
|
|
|
| 814 |
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 815 |
})
|
|
|
|
| 816 |
return jsonify({"ok": True, "transcript": transcript, "ticket": ticket})
|
| 817 |
|
| 818 |
|
|
|
|
| 820 |
# FEATURE 4 — System Tutorial Ingestion
|
| 821 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 822 |
|
| 823 |
+
TUTORIAL_EXTRACTION_PROMPT = """You are a knowledge base curator for a software support system.
|
|
|
|
| 824 |
|
| 825 |
+
You have a timestamped transcript from a tutorial video about the Iris Support Portal.
|
| 826 |
+
Extract one KB article per distinct feature or task demonstrated.
|
| 827 |
|
| 828 |
+
Return ONLY a valid JSON array — no preamble, no markdown fences.
|
| 829 |
+
All strings must be properly JSON-escaped.
|
| 830 |
|
| 831 |
+
Schema per item:
|
| 832 |
+
{"title": "string", "content": "string (step-by-step instructions)", "category": "one of: Account|Tickets|Agents|Reports|Admin|Other", "tags": ["string"], "timestamp_start": <integer seconds>, "timestamp_end": <integer seconds>}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
|
| 834 |
+
Transcript:
|
|
|
|
|
|
|
| 835 |
"""
|
| 836 |
|
| 837 |
def _parse_timestamp_to_seconds(ts: str) -> int:
|
| 838 |
+
parts = str(ts).strip("[]").split(":")
|
|
|
|
| 839 |
try:
|
| 840 |
if len(parts) == 2:
|
| 841 |
return int(parts[0]) * 60 + int(parts[1])
|
|
|
|
| 845 |
pass
|
| 846 |
return 0
|
| 847 |
|
|
|
|
| 848 |
@app.post("/api/kb/tutorial-ingest")
|
| 849 |
def tutorial_ingest():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 850 |
body = request.get_json(silent=True) or {}
|
| 851 |
transcript = body.get("transcript", "").strip()
|
| 852 |
video_url = body.get("video_url", "")
|
| 853 |
video_title = body.get("video_title", "Tutorial")
|
|
|
|
| 854 |
if not transcript:
|
| 855 |
return jsonify({"ok": False, "error": "transcript is required"}), 400
|
| 856 |
+
raw = _gemini_text(TUTORIAL_EXTRACTION_PROMPT + transcript[:50000], json_mode=True)
|
| 857 |
+
parsed = _safe_json(raw, [])
|
| 858 |
+
articles = _validate_articles(parsed) if isinstance(parsed, list) else []
|
| 859 |
+
if not articles:
|
| 860 |
+
return jsonify({"ok": False, "error": "Gemini returned no valid articles from transcript"}), 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
for a in articles:
|
| 862 |
a["video_url"] = video_url
|
| 863 |
a["video_title"] = video_title
|
|
|
|
| 864 |
for ts_key in ("timestamp_start", "timestamp_end"):
|
| 865 |
val = a.get(ts_key)
|
| 866 |
if isinstance(val, str):
|
| 867 |
a[ts_key] = _parse_timestamp_to_seconds(val)
|
| 868 |
elif not isinstance(val, int):
|
| 869 |
a[ts_key] = 0
|
|
|
|
| 870 |
stats = _save_kb_articles(articles, source_label=f"tutorial:{video_title}")
|
| 871 |
+
return jsonify({"ok": True, "video_title": video_title, "articles_found": len(articles), "saved": stats["saved"], "skipped_dupes": stats["skipped"]})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 872 |
|
| 873 |
|
| 874 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 875 |
# FEATURE 5 — Agent Solution Writing (NL Text + Voice)
|
| 876 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 877 |
|
| 878 |
+
SOLUTION_EXTRACTION_PROMPT = """You are a support knowledge base curator.
|
| 879 |
+
An agent has described a solution they used to resolve a ticket.
|
|
|
|
|
|
|
| 880 |
Structure this into a reusable KB article.
|
| 881 |
|
| 882 |
+
Return ONLY a valid JSON object — no preamble, no markdown fences.
|
| 883 |
+
All strings must be properly JSON-escaped.
|
| 884 |
+
|
| 885 |
+
Schema:
|
| 886 |
+
{"title": "string", "content": "string (clear step-by-step solution)", "category": "one of: Account|Billing|Technical|Feature|Other", "tags": ["string"]}
|
|
|
|
|
|
|
| 887 |
|
| 888 |
+
Agent description:
|
| 889 |
"""
|
| 890 |
|
| 891 |
@app.post("/api/kb/agent-solution-nl")
|
| 892 |
def agent_solution_nl():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 893 |
body = request.get_json(silent=True) or {}
|
| 894 |
message = body.get("message", "").strip()
|
| 895 |
agent_id = body.get("agent_id", "unknown")
|
| 896 |
ticket_id = body.get("ticket_id", "")
|
|
|
|
| 897 |
if not message:
|
| 898 |
return jsonify({"ok": False, "error": "message is required"}), 400
|
| 899 |
+
raw = _gemini_text(SOLUTION_EXTRACTION_PROMPT + message, json_mode=True)
|
| 900 |
+
article = _safe_json(raw, {})
|
| 901 |
+
if not isinstance(article, dict) or not article.get("title"):
|
|
|
|
|
|
|
| 902 |
return jsonify({"ok": False, "error": "Could not structure solution"}), 500
|
|
|
|
|
|
|
| 903 |
if ticket_id:
|
| 904 |
article.setdefault("tags", []).append(f"ticket:{ticket_id}")
|
|
|
|
| 905 |
stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
|
| 906 |
+
return jsonify({"ok": True, "saved": stats["saved"], "article": article})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 907 |
|
| 908 |
|
| 909 |
@app.post("/api/kb/agent-solution-voice")
|
| 910 |
def agent_solution_voice():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 911 |
body = request.get_json(silent=True) or {}
|
| 912 |
audio_b64 = body.get("audio_b64", "")
|
| 913 |
audio_format = body.get("audio_format", "wav")
|
| 914 |
agent_id = body.get("agent_id", "unknown")
|
| 915 |
ticket_id = body.get("ticket_id", "")
|
|
|
|
| 916 |
if not audio_b64:
|
| 917 |
return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
|
|
|
|
| 918 |
transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
|
|
|
|
| 919 |
if not transcript:
|
| 920 |
return jsonify({"ok": False, "error": "Transcription failed"}), 500
|
| 921 |
+
raw = _gemini_text(SOLUTION_EXTRACTION_PROMPT + transcript, json_mode=True)
|
| 922 |
+
article = _safe_json(raw, {})
|
| 923 |
+
if not isinstance(article, dict) or not article.get("title"):
|
|
|
|
|
|
|
| 924 |
return jsonify({"ok": False, "error": "Could not structure solution from transcript"}), 500
|
|
|
|
| 925 |
if ticket_id:
|
| 926 |
article.setdefault("tags", []).append(f"ticket:{ticket_id}")
|
|
|
|
| 927 |
stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
|
| 928 |
+
return jsonify({"ok": True, "transcript": transcript, "saved": stats["saved"], "article": article})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 929 |
|
| 930 |
|
| 931 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 932 |
+
# FEATURE 6 — Iris Chatbot (RAG over KB + Tutorials)
|
| 933 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 934 |
|
| 935 |
def _search_kb(query: str, limit: int = 5) -> List[Dict]:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 936 |
if not db:
|
| 937 |
return []
|
|
|
|
| 938 |
query_terms = [t.lower() for t in query.split() if len(t) > 2]
|
|
|
|
| 939 |
try:
|
|
|
|
| 940 |
docs = db.collection("iris_kb_articles").order_by(
|
| 941 |
"created_at", direction=firestore.Query.DESCENDING
|
| 942 |
).limit(200).stream()
|
|
|
|
| 943 |
results = []
|
| 944 |
for doc in docs:
|
| 945 |
d = doc.to_dict()
|
|
|
|
| 947 |
score = sum(1 for term in query_terms if term in text)
|
| 948 |
if score > 0:
|
| 949 |
results.append({"score": score, **d})
|
|
|
|
| 950 |
results.sort(key=lambda x: x["score"], reverse=True)
|
| 951 |
return results[:limit]
|
|
|
|
| 952 |
except Exception as e:
|
| 953 |
logger.error("KB search error: %s", e)
|
| 954 |
return []
|
| 955 |
|
| 956 |
|
| 957 |
+
CHATBOT_SYSTEM_PROMPT = """You are Iris, an intelligent support assistant for the Iris Support Portal.
|
|
|
|
| 958 |
|
| 959 |
+
Answer ONLY from the provided knowledge base context.
|
| 960 |
+
If the answer is in a tutorial with a timestamp, mention the video and timestamp.
|
| 961 |
+
Be concise, clear, and friendly. Format step-by-step answers as numbered lists.
|
| 962 |
+
If you cannot find the answer, say so honestly and suggest submitting a ticket.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 963 |
"""
|
| 964 |
|
| 965 |
@app.post("/api/chatbot/query")
|
| 966 |
def chatbot_query():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 967 |
body = request.get_json(silent=True) or {}
|
| 968 |
message = body.get("message", "").strip()
|
| 969 |
session_id = body.get("session_id", "default")
|
| 970 |
user_id = body.get("user_id", "anonymous")
|
|
|
|
| 971 |
if not message:
|
| 972 |
return jsonify({"ok": False, "error": "message is required"}), 400
|
|
|
|
|
|
|
| 973 |
kb_results = _search_kb(message, limit=5)
|
|
|
|
| 974 |
context_blocks = []
|
| 975 |
sources = []
|
| 976 |
for r in kb_results:
|
| 977 |
block = f"[Article: {r.get('title')}]\n{r.get('content', '')}"
|
| 978 |
if r.get("timestamp_start") is not None:
|
| 979 |
+
ts = r["timestamp_start"]
|
| 980 |
+
block += f"\n(Tutorial: {r.get('video_title','Video')} at {ts//60:02d}:{ts%60:02d}"
|
| 981 |
+
if r.get("video_url"):
|
| 982 |
+
block += f" — {r['video_url']}"
|
| 983 |
+
block += ")"
|
|
|
|
| 984 |
context_blocks.append(block)
|
| 985 |
sources.append({
|
| 986 |
"title": r.get("title"),
|
|
|
|
| 989 |
"ts_start": r.get("timestamp_start"),
|
| 990 |
"video_url": r.get("video_url"),
|
| 991 |
})
|
| 992 |
+
context_str = "\n\n---\n\n".join(context_blocks) if context_blocks else "No relevant articles found."
|
| 993 |
+
full_prompt = f"{CHATBOT_SYSTEM_PROMPT}\n\nKNOWLEDGE BASE CONTEXT:\n{context_str}\n\nUSER QUESTION: {message}\n\nAnswer:"
|
| 994 |
+
answer = _gemini_text(full_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 995 |
if not answer:
|
| 996 |
+
answer = "Sorry, I could not process your question right now. Please try again or submit a support ticket."
|
|
|
|
|
|
|
| 997 |
if db:
|
| 998 |
db.collection("iris_chatbot_logs").add({
|
| 999 |
+
"user_id": user_id, "session_id": session_id,
|
| 1000 |
+
"message": message, "answer": answer, "sources": sources,
|
|
|
|
|
|
|
|
|
|
| 1001 |
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 1002 |
})
|
| 1003 |
+
return jsonify({"ok": True, "answer": answer, "sources": sources})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1004 |
|
| 1005 |
|
| 1006 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 1007 |
+
# KB READ / DELETE ENDPOINTS
|
| 1008 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 1009 |
|
| 1010 |
@app.get("/api/kb/articles")
|
| 1011 |
def list_kb_articles():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1012 |
category = request.args.get("category", "")
|
| 1013 |
limit = int(request.args.get("limit", 50))
|
|
|
|
| 1014 |
if not db:
|
| 1015 |
return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
|
|
|
|
| 1016 |
try:
|
| 1017 |
+
query = db.collection("iris_kb_articles").order_by("created_at", direction=firestore.Query.DESCENDING)
|
|
|
|
|
|
|
| 1018 |
if category:
|
| 1019 |
query = query.where("category", "==", category)
|
|
|
|
| 1020 |
docs = query.limit(limit).stream()
|
| 1021 |
articles = [{"id": d.id, **d.to_dict()} for d in docs]
|
| 1022 |
return jsonify({"ok": True, "articles": articles, "count": len(articles)})
|
| 1023 |
except Exception as e:
|
|
|
|
| 1024 |
return jsonify({"ok": False, "error": str(e)}), 500
|
| 1025 |
|
| 1026 |
|
| 1027 |
@app.delete("/api/kb/articles/<article_id>")
|
| 1028 |
def delete_kb_article(article_id: str):
|
|
|
|
| 1029 |
if not db:
|
| 1030 |
return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
|
| 1031 |
try:
|
|
|
|
| 1048 |
article_count = docs[0][0].value
|
| 1049 |
except Exception:
|
| 1050 |
pass
|
|
|
|
| 1051 |
return jsonify({
|
| 1052 |
+
"ok": True,
|
| 1053 |
+
"service": "Iris AI Service v1.1",
|
| 1054 |
+
"model": GEMINI_MODEL,
|
| 1055 |
+
"gemini": bool(_gemini_client),
|
| 1056 |
+
"assemblyai": bool(ASSEMBLYAI_API_KEY),
|
| 1057 |
+
"firebase": bool(db),
|
| 1058 |
+
"kb_articles": article_count,
|
| 1059 |
})
|
| 1060 |
|
| 1061 |
|
|
|
|
| 1065 |
|
| 1066 |
if __name__ == "__main__":
|
| 1067 |
port = int(os.environ.get("PORT", 7860))
|
| 1068 |
+
logger.info("Iris AI Service v1.1 starting on port %d (model=%s)", port, GEMINI_MODEL)
|
| 1069 |
app.run(host="0.0.0.0", port=port)
|