Spaces:
Sleeping
Sleeping
fix: disable redirect_slashes to prevent mixed content errors behind HF proxy
Browse files- app/database.py +138 -10
- app/main.py +1 -0
- app/routers/document_router.py +6 -2
- app/routers/gig_router.py +14 -7
app/database.py
CHANGED
|
@@ -339,6 +339,95 @@ class AsyncCollection:
|
|
| 339 |
_json_dumps(payload),
|
| 340 |
)
|
| 341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
async def _find_docs(
|
| 343 |
self,
|
| 344 |
query: dict[str, Any] | None,
|
|
@@ -347,18 +436,57 @@ class AsyncCollection:
|
|
| 347 |
skip: int = 0,
|
| 348 |
limit: int | None = None,
|
| 349 |
) -> list[dict[str, Any]]:
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
|
|
|
|
|
|
| 360 |
|
| 361 |
-
return [_apply_projection(doc, projection) for doc in
|
| 362 |
|
| 363 |
def find(self, query: dict[str, Any] | None = None, projection: dict[str, Any] | None = None) -> AsyncCursor:
|
| 364 |
return AsyncCursor(self, query, projection)
|
|
|
|
| 339 |
_json_dumps(payload),
|
| 340 |
)
|
| 341 |
|
| 342 |
+
@staticmethod
|
| 343 |
+
def _build_sql_conditions(
|
| 344 |
+
query: dict[str, Any] | None,
|
| 345 |
+
) -> tuple[list[str], list[Any], bool]:
|
| 346 |
+
"""Try to convert MongoDB-style query to SQL WHERE clauses.
|
| 347 |
+
|
| 348 |
+
Returns (conditions, params, needs_python_filter).
|
| 349 |
+
If needs_python_filter is True, the SQL result must still be
|
| 350 |
+
filtered in Python with _matches_query for correctness.
|
| 351 |
+
"""
|
| 352 |
+
if not query:
|
| 353 |
+
return [], [], False
|
| 354 |
+
|
| 355 |
+
conditions: list[str] = []
|
| 356 |
+
params: list[Any] = []
|
| 357 |
+
needs_python = False
|
| 358 |
+
idx = 1 # $1, $2, ... param counter
|
| 359 |
+
|
| 360 |
+
for key, expected in query.items():
|
| 361 |
+
if key in ("$or", "$and"):
|
| 362 |
+
needs_python = True
|
| 363 |
+
continue
|
| 364 |
+
|
| 365 |
+
if key == "_id":
|
| 366 |
+
if isinstance(expected, (str, ObjectId)):
|
| 367 |
+
conditions.append(f"_id = ${idx}")
|
| 368 |
+
params.append(str(_normalize_scalar(expected)))
|
| 369 |
+
idx += 1
|
| 370 |
+
else:
|
| 371 |
+
needs_python = True
|
| 372 |
+
continue
|
| 373 |
+
|
| 374 |
+
if not _is_operator_dict(expected):
|
| 375 |
+
# Simple equality: data->>'field' = $N
|
| 376 |
+
# Booleans need special handling: Python str(True)='True', JSONB text='true'
|
| 377 |
+
if expected is True:
|
| 378 |
+
conditions.append(f"data->>'{key}' = 'true'")
|
| 379 |
+
elif expected is False:
|
| 380 |
+
conditions.append(f"data->>'{key}' = 'false'")
|
| 381 |
+
elif expected is None:
|
| 382 |
+
conditions.append(f"data->>'{key}' IS NULL")
|
| 383 |
+
else:
|
| 384 |
+
conditions.append(f"data->>'{key}' = ${idx}")
|
| 385 |
+
params.append(str(_normalize_scalar(expected)))
|
| 386 |
+
idx += 1
|
| 387 |
+
continue
|
| 388 |
+
|
| 389 |
+
# Operator dict
|
| 390 |
+
for op, op_value in expected.items():
|
| 391 |
+
if op == "$ne":
|
| 392 |
+
if op_value is True:
|
| 393 |
+
conditions.append(
|
| 394 |
+
f"(data->>'{key}' IS NULL OR data->>'{key}' != 'true')"
|
| 395 |
+
)
|
| 396 |
+
elif op_value is False:
|
| 397 |
+
conditions.append(
|
| 398 |
+
f"(data->>'{key}' IS NULL OR data->>'{key}' != 'false')"
|
| 399 |
+
)
|
| 400 |
+
else:
|
| 401 |
+
conditions.append(
|
| 402 |
+
f"(data->>'{key}' IS NULL OR data->>'{key}' != ${idx})"
|
| 403 |
+
)
|
| 404 |
+
params.append(str(_normalize_scalar(op_value)))
|
| 405 |
+
idx += 1
|
| 406 |
+
elif op == "$in":
|
| 407 |
+
if isinstance(op_value, list) and op_value:
|
| 408 |
+
placeholders = ", ".join(
|
| 409 |
+
f"${idx + i}" for i in range(len(op_value))
|
| 410 |
+
)
|
| 411 |
+
conditions.append(
|
| 412 |
+
f"data->>'{key}' IN ({placeholders})"
|
| 413 |
+
)
|
| 414 |
+
for v in op_value:
|
| 415 |
+
params.append(str(_normalize_scalar(v)))
|
| 416 |
+
idx += 1
|
| 417 |
+
else:
|
| 418 |
+
needs_python = True
|
| 419 |
+
elif op in ("$gt", "$gte", "$lt", "$lte"):
|
| 420 |
+
sql_op = {"$gt": ">", "$gte": ">=", "$lt": "<", "$lte": "<="}[op]
|
| 421 |
+
conditions.append(f"data->>'{key}' {sql_op} ${idx}")
|
| 422 |
+
params.append(str(_normalize_scalar(op_value)))
|
| 423 |
+
idx += 1
|
| 424 |
+
elif op in ("$regex", "$options"):
|
| 425 |
+
needs_python = True
|
| 426 |
+
else:
|
| 427 |
+
needs_python = True
|
| 428 |
+
|
| 429 |
+
return conditions, params, needs_python
|
| 430 |
+
|
| 431 |
async def _find_docs(
|
| 432 |
self,
|
| 433 |
query: dict[str, Any] | None,
|
|
|
|
| 436 |
skip: int = 0,
|
| 437 |
limit: int | None = None,
|
| 438 |
) -> list[dict[str, Any]]:
|
| 439 |
+
conditions, params, needs_python = self._build_sql_conditions(query)
|
| 440 |
+
|
| 441 |
+
sql = f'SELECT _id, data FROM "{self._name}"'
|
| 442 |
+
if conditions:
|
| 443 |
+
sql += " WHERE " + " AND ".join(conditions)
|
| 444 |
+
|
| 445 |
+
# Push sort to SQL when possible (single sort field)
|
| 446 |
+
sort_list = list(sort_fields or [])
|
| 447 |
+
sql_sorted = False
|
| 448 |
+
if sort_list and not needs_python:
|
| 449 |
+
order_clauses = []
|
| 450 |
+
for field, direction in sort_list:
|
| 451 |
+
dir_str = "DESC" if int(direction) == -1 else "ASC"
|
| 452 |
+
order_clauses.append(f"data->>'{field}' {dir_str}")
|
| 453 |
+
sql += " ORDER BY " + ", ".join(order_clauses)
|
| 454 |
+
sql_sorted = True
|
| 455 |
+
|
| 456 |
+
# Push limit/skip to SQL when no Python filtering needed
|
| 457 |
+
if not needs_python and sql_sorted:
|
| 458 |
+
if skip:
|
| 459 |
+
sql += f" OFFSET {max(0, int(skip))}"
|
| 460 |
+
if limit is not None:
|
| 461 |
+
sql += f" LIMIT {max(0, int(limit))}"
|
| 462 |
+
|
| 463 |
+
rows = await self._database.pool.fetch(sql, *params)
|
| 464 |
+
documents: list[dict[str, Any]] = []
|
| 465 |
+
for row in rows:
|
| 466 |
+
data = _json_loads(row["data"]) or {}
|
| 467 |
+
data["_id"] = row["_id"]
|
| 468 |
+
documents.append(data)
|
| 469 |
+
|
| 470 |
+
# If we needed Python filtering, apply it now on the narrowed set
|
| 471 |
+
if needs_python:
|
| 472 |
+
documents = [doc for doc in documents if _matches_query(doc, query or {})]
|
| 473 |
+
|
| 474 |
+
# If sorting wasn't done in SQL, do it in Python
|
| 475 |
+
if not sql_sorted and sort_list:
|
| 476 |
+
for field, direction in reversed(sort_list):
|
| 477 |
+
documents.sort(
|
| 478 |
+
key=lambda item: _sort_key(item.get(field)),
|
| 479 |
+
reverse=int(direction) == -1,
|
| 480 |
+
)
|
| 481 |
|
| 482 |
+
# If skip/limit weren't pushed to SQL, apply in Python
|
| 483 |
+
if needs_python or not sql_sorted:
|
| 484 |
+
if skip:
|
| 485 |
+
documents = documents[max(0, int(skip)):]
|
| 486 |
+
if limit is not None:
|
| 487 |
+
documents = documents[:max(0, int(limit))]
|
| 488 |
|
| 489 |
+
return [_apply_projection(doc, projection) for doc in documents]
|
| 490 |
|
| 491 |
def find(self, query: dict[str, Any] | None = None, projection: dict[str, Any] | None = None) -> AsyncCursor:
|
| 492 |
return AsyncCursor(self, query, projection)
|
app/main.py
CHANGED
|
@@ -16,6 +16,7 @@ app = FastAPI(
|
|
| 16 |
title=settings.APP_NAME,
|
| 17 |
description="Legal Document OCR + Translation + Expert Verification Platform",
|
| 18 |
version="1.0.0",
|
|
|
|
| 19 |
)
|
| 20 |
|
| 21 |
|
|
|
|
| 16 |
title=settings.APP_NAME,
|
| 17 |
description="Legal Document OCR + Translation + Expert Verification Platform",
|
| 18 |
version="1.0.0",
|
| 19 |
+
redirect_slashes=False,
|
| 20 |
)
|
| 21 |
|
| 22 |
|
app/routers/document_router.py
CHANGED
|
@@ -142,9 +142,13 @@ async def _assign_review_team(document_id: str, document_name: str) -> None:
|
|
| 142 |
).to_list(length=5000)
|
| 143 |
|
| 144 |
selected_linguist = random.choice(linguists) if linguists else None
|
| 145 |
-
selected_translator = random.choice(translators) if translators else None
|
| 146 |
-
|
| 147 |
linguist_id = str(selected_linguist["_id"]) if selected_linguist else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
translator_id = str(selected_translator["_id"]) if selected_translator else None
|
| 149 |
|
| 150 |
await db.documents.update_one(
|
|
|
|
| 142 |
).to_list(length=5000)
|
| 143 |
|
| 144 |
selected_linguist = random.choice(linguists) if linguists else None
|
|
|
|
|
|
|
| 145 |
linguist_id = str(selected_linguist["_id"]) if selected_linguist else None
|
| 146 |
+
|
| 147 |
+
# Ensure translator is a different person than the linguist
|
| 148 |
+
available_translators = [t for t in translators if str(t["_id"]) != linguist_id] if linguist_id else translators
|
| 149 |
+
if not available_translators and translators:
|
| 150 |
+
available_translators = translators # fallback if no other translator exists
|
| 151 |
+
selected_translator = random.choice(available_translators) if available_translators else None
|
| 152 |
translator_id = str(selected_translator["_id"]) if selected_translator else None
|
| 153 |
|
| 154 |
await db.documents.update_one(
|
app/routers/gig_router.py
CHANGED
|
@@ -86,14 +86,24 @@ async def _assign_missing_team_members() -> None:
|
|
| 86 |
)
|
| 87 |
async for doc in cursor:
|
| 88 |
updates = {}
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
| 90 |
selected = random.choice(linguists)
|
| 91 |
-
|
|
|
|
| 92 |
updates["assigned_linguist_name"] = selected.get("name", "Linguist")
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
updates["assigned_translator_id"] = str(selected["_id"])
|
| 96 |
updates["assigned_translator_name"] = selected.get("name", "Translator")
|
|
|
|
| 97 |
if updates:
|
| 98 |
await db.documents.update_one({"_id": doc["_id"]}, {"$set": updates})
|
| 99 |
|
|
@@ -230,9 +240,6 @@ async def submit_verification(
|
|
| 230 |
role = _normalized_reviewer_role(reviewer.get("role", ""))
|
| 231 |
if not role:
|
| 232 |
raise HTTPException(status_code=403, detail="Reviewer role required")
|
| 233 |
-
|
| 234 |
-
if role == "linguist" and (not req.notes or not req.notes.strip()):
|
| 235 |
-
raise HTTPException(status_code=400, detail="Linguistic experts must provide notes for the end user before submitting")
|
| 236 |
|
| 237 |
db = get_db()
|
| 238 |
try:
|
|
|
|
| 86 |
)
|
| 87 |
async for doc in cursor:
|
| 88 |
updates = {}
|
| 89 |
+
current_linguist_id = doc.get("assigned_linguist_id")
|
| 90 |
+
current_translator_id = doc.get("assigned_translator_id")
|
| 91 |
+
|
| 92 |
+
if not current_linguist_id and linguists:
|
| 93 |
selected = random.choice(linguists)
|
| 94 |
+
current_linguist_id = str(selected["_id"])
|
| 95 |
+
updates["assigned_linguist_id"] = current_linguist_id
|
| 96 |
updates["assigned_linguist_name"] = selected.get("name", "Linguist")
|
| 97 |
+
|
| 98 |
+
if not current_translator_id and translators:
|
| 99 |
+
# Ensure translator is different from linguist
|
| 100 |
+
available = [t for t in translators if str(t["_id"]) != current_linguist_id] if current_linguist_id else translators
|
| 101 |
+
if not available:
|
| 102 |
+
available = translators # fallback
|
| 103 |
+
selected = random.choice(available)
|
| 104 |
updates["assigned_translator_id"] = str(selected["_id"])
|
| 105 |
updates["assigned_translator_name"] = selected.get("name", "Translator")
|
| 106 |
+
|
| 107 |
if updates:
|
| 108 |
await db.documents.update_one({"_id": doc["_id"]}, {"$set": updates})
|
| 109 |
|
|
|
|
| 240 |
role = _normalized_reviewer_role(reviewer.get("role", ""))
|
| 241 |
if not role:
|
| 242 |
raise HTTPException(status_code=403, detail="Reviewer role required")
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
db = get_db()
|
| 245 |
try:
|