arnavam commited on
Commit
c4757a8
·
1 Parent(s): 6394504

fix: disable redirect_slashes to prevent mixed content errors behind HF proxy

Browse files
app/database.py CHANGED
@@ -339,6 +339,95 @@ class AsyncCollection:
339
  _json_dumps(payload),
340
  )
341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  async def _find_docs(
343
  self,
344
  query: dict[str, Any] | None,
@@ -347,18 +436,57 @@ class AsyncCollection:
347
  skip: int = 0,
348
  limit: int | None = None,
349
  ) -> list[dict[str, Any]]:
350
- documents = await self._fetch_all_documents()
351
- matched = [doc for doc in documents if _matches_query(doc, query or {})]
352
-
353
- for field, direction in reversed(list(sort_fields or [])):
354
- matched.sort(key=lambda item: _sort_key(item.get(field)), reverse=int(direction) == -1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
- if skip:
357
- matched = matched[max(0, int(skip)) :]
358
- if limit is not None:
359
- matched = matched[: max(0, int(limit))]
 
 
360
 
361
- return [_apply_projection(doc, projection) for doc in matched]
362
 
363
  def find(self, query: dict[str, Any] | None = None, projection: dict[str, Any] | None = None) -> AsyncCursor:
364
  return AsyncCursor(self, query, projection)
 
339
  _json_dumps(payload),
340
  )
341
 
342
+ @staticmethod
343
+ def _build_sql_conditions(
344
+ query: dict[str, Any] | None,
345
+ ) -> tuple[list[str], list[Any], bool]:
346
+ """Try to convert MongoDB-style query to SQL WHERE clauses.
347
+
348
+ Returns (conditions, params, needs_python_filter).
349
+ If needs_python_filter is True, the SQL result must still be
350
+ filtered in Python with _matches_query for correctness.
351
+ """
352
+ if not query:
353
+ return [], [], False
354
+
355
+ conditions: list[str] = []
356
+ params: list[Any] = []
357
+ needs_python = False
358
+ idx = 1 # $1, $2, ... param counter
359
+
360
+ for key, expected in query.items():
361
+ if key in ("$or", "$and"):
362
+ needs_python = True
363
+ continue
364
+
365
+ if key == "_id":
366
+ if isinstance(expected, (str, ObjectId)):
367
+ conditions.append(f"_id = ${idx}")
368
+ params.append(str(_normalize_scalar(expected)))
369
+ idx += 1
370
+ else:
371
+ needs_python = True
372
+ continue
373
+
374
+ if not _is_operator_dict(expected):
375
+ # Simple equality: data->>'field' = $N
376
+ # Booleans need special handling: Python str(True)='True', JSONB text='true'
377
+ if expected is True:
378
+ conditions.append(f"data->>'{key}' = 'true'")
379
+ elif expected is False:
380
+ conditions.append(f"data->>'{key}' = 'false'")
381
+ elif expected is None:
382
+ conditions.append(f"data->>'{key}' IS NULL")
383
+ else:
384
+ conditions.append(f"data->>'{key}' = ${idx}")
385
+ params.append(str(_normalize_scalar(expected)))
386
+ idx += 1
387
+ continue
388
+
389
+ # Operator dict
390
+ for op, op_value in expected.items():
391
+ if op == "$ne":
392
+ if op_value is True:
393
+ conditions.append(
394
+ f"(data->>'{key}' IS NULL OR data->>'{key}' != 'true')"
395
+ )
396
+ elif op_value is False:
397
+ conditions.append(
398
+ f"(data->>'{key}' IS NULL OR data->>'{key}' != 'false')"
399
+ )
400
+ else:
401
+ conditions.append(
402
+ f"(data->>'{key}' IS NULL OR data->>'{key}' != ${idx})"
403
+ )
404
+ params.append(str(_normalize_scalar(op_value)))
405
+ idx += 1
406
+ elif op == "$in":
407
+ if isinstance(op_value, list) and op_value:
408
+ placeholders = ", ".join(
409
+ f"${idx + i}" for i in range(len(op_value))
410
+ )
411
+ conditions.append(
412
+ f"data->>'{key}' IN ({placeholders})"
413
+ )
414
+ for v in op_value:
415
+ params.append(str(_normalize_scalar(v)))
416
+ idx += 1
417
+ else:
418
+ needs_python = True
419
+ elif op in ("$gt", "$gte", "$lt", "$lte"):
420
+ sql_op = {"$gt": ">", "$gte": ">=", "$lt": "<", "$lte": "<="}[op]
421
+ conditions.append(f"data->>'{key}' {sql_op} ${idx}")
422
+ params.append(str(_normalize_scalar(op_value)))
423
+ idx += 1
424
+ elif op in ("$regex", "$options"):
425
+ needs_python = True
426
+ else:
427
+ needs_python = True
428
+
429
+ return conditions, params, needs_python
430
+
431
  async def _find_docs(
432
  self,
433
  query: dict[str, Any] | None,
 
436
  skip: int = 0,
437
  limit: int | None = None,
438
  ) -> list[dict[str, Any]]:
439
+ conditions, params, needs_python = self._build_sql_conditions(query)
440
+
441
+ sql = f'SELECT _id, data FROM "{self._name}"'
442
+ if conditions:
443
+ sql += " WHERE " + " AND ".join(conditions)
444
+
445
+ # Push sort to SQL when possible (single sort field)
446
+ sort_list = list(sort_fields or [])
447
+ sql_sorted = False
448
+ if sort_list and not needs_python:
449
+ order_clauses = []
450
+ for field, direction in sort_list:
451
+ dir_str = "DESC" if int(direction) == -1 else "ASC"
452
+ order_clauses.append(f"data->>'{field}' {dir_str}")
453
+ sql += " ORDER BY " + ", ".join(order_clauses)
454
+ sql_sorted = True
455
+
456
+ # Push limit/skip to SQL when no Python filtering needed
457
+ if not needs_python and sql_sorted:
458
+ if skip:
459
+ sql += f" OFFSET {max(0, int(skip))}"
460
+ if limit is not None:
461
+ sql += f" LIMIT {max(0, int(limit))}"
462
+
463
+ rows = await self._database.pool.fetch(sql, *params)
464
+ documents: list[dict[str, Any]] = []
465
+ for row in rows:
466
+ data = _json_loads(row["data"]) or {}
467
+ data["_id"] = row["_id"]
468
+ documents.append(data)
469
+
470
+ # If we needed Python filtering, apply it now on the narrowed set
471
+ if needs_python:
472
+ documents = [doc for doc in documents if _matches_query(doc, query or {})]
473
+
474
+ # If sorting wasn't done in SQL, do it in Python
475
+ if not sql_sorted and sort_list:
476
+ for field, direction in reversed(sort_list):
477
+ documents.sort(
478
+ key=lambda item: _sort_key(item.get(field)),
479
+ reverse=int(direction) == -1,
480
+ )
481
 
482
+ # If skip/limit weren't pushed to SQL, apply in Python
483
+ if needs_python or not sql_sorted:
484
+ if skip:
485
+ documents = documents[max(0, int(skip)):]
486
+ if limit is not None:
487
+ documents = documents[:max(0, int(limit))]
488
 
489
+ return [_apply_projection(doc, projection) for doc in documents]
490
 
491
  def find(self, query: dict[str, Any] | None = None, projection: dict[str, Any] | None = None) -> AsyncCursor:
492
  return AsyncCursor(self, query, projection)
app/main.py CHANGED
@@ -16,6 +16,7 @@ app = FastAPI(
16
  title=settings.APP_NAME,
17
  description="Legal Document OCR + Translation + Expert Verification Platform",
18
  version="1.0.0",
 
19
  )
20
 
21
 
 
16
  title=settings.APP_NAME,
17
  description="Legal Document OCR + Translation + Expert Verification Platform",
18
  version="1.0.0",
19
+ redirect_slashes=False,
20
  )
21
 
22
 
app/routers/document_router.py CHANGED
@@ -142,9 +142,13 @@ async def _assign_review_team(document_id: str, document_name: str) -> None:
142
  ).to_list(length=5000)
143
 
144
  selected_linguist = random.choice(linguists) if linguists else None
145
- selected_translator = random.choice(translators) if translators else None
146
-
147
  linguist_id = str(selected_linguist["_id"]) if selected_linguist else None
 
 
 
 
 
 
148
  translator_id = str(selected_translator["_id"]) if selected_translator else None
149
 
150
  await db.documents.update_one(
 
142
  ).to_list(length=5000)
143
 
144
  selected_linguist = random.choice(linguists) if linguists else None
 
 
145
  linguist_id = str(selected_linguist["_id"]) if selected_linguist else None
146
+
147
+ # Ensure translator is a different person than the linguist
148
+ available_translators = [t for t in translators if str(t["_id"]) != linguist_id] if linguist_id else translators
149
+ if not available_translators and translators:
150
+ available_translators = translators # fallback if no other translator exists
151
+ selected_translator = random.choice(available_translators) if available_translators else None
152
  translator_id = str(selected_translator["_id"]) if selected_translator else None
153
 
154
  await db.documents.update_one(
app/routers/gig_router.py CHANGED
@@ -86,14 +86,24 @@ async def _assign_missing_team_members() -> None:
86
  )
87
  async for doc in cursor:
88
  updates = {}
89
- if not doc.get("assigned_linguist_id") and linguists:
 
 
 
90
  selected = random.choice(linguists)
91
- updates["assigned_linguist_id"] = str(selected["_id"])
 
92
  updates["assigned_linguist_name"] = selected.get("name", "Linguist")
93
- if not doc.get("assigned_translator_id") and translators:
94
- selected = random.choice(translators)
 
 
 
 
 
95
  updates["assigned_translator_id"] = str(selected["_id"])
96
  updates["assigned_translator_name"] = selected.get("name", "Translator")
 
97
  if updates:
98
  await db.documents.update_one({"_id": doc["_id"]}, {"$set": updates})
99
 
@@ -230,9 +240,6 @@ async def submit_verification(
230
  role = _normalized_reviewer_role(reviewer.get("role", ""))
231
  if not role:
232
  raise HTTPException(status_code=403, detail="Reviewer role required")
233
-
234
- if role == "linguist" and (not req.notes or not req.notes.strip()):
235
- raise HTTPException(status_code=400, detail="Linguistic experts must provide notes for the end user before submitting")
236
 
237
  db = get_db()
238
  try:
 
86
  )
87
  async for doc in cursor:
88
  updates = {}
89
+ current_linguist_id = doc.get("assigned_linguist_id")
90
+ current_translator_id = doc.get("assigned_translator_id")
91
+
92
+ if not current_linguist_id and linguists:
93
  selected = random.choice(linguists)
94
+ current_linguist_id = str(selected["_id"])
95
+ updates["assigned_linguist_id"] = current_linguist_id
96
  updates["assigned_linguist_name"] = selected.get("name", "Linguist")
97
+
98
+ if not current_translator_id and translators:
99
+ # Ensure translator is different from linguist
100
+ available = [t for t in translators if str(t["_id"]) != current_linguist_id] if current_linguist_id else translators
101
+ if not available:
102
+ available = translators # fallback
103
+ selected = random.choice(available)
104
  updates["assigned_translator_id"] = str(selected["_id"])
105
  updates["assigned_translator_name"] = selected.get("name", "Translator")
106
+
107
  if updates:
108
  await db.documents.update_one({"_id": doc["_id"]}, {"$set": updates})
109
 
 
240
  role = _normalized_reviewer_role(reviewer.get("role", ""))
241
  if not role:
242
  raise HTTPException(status_code=403, detail="Reviewer role required")
 
 
 
243
 
244
  db = get_db()
245
  try: