Spaces:
Running
Running
Oviya
commited on
Commit
·
a056eaa
1
Parent(s):
a1f6dcf
fix
Browse files- ragg/ingest_trigger.py +62 -3
ragg/ingest_trigger.py
CHANGED
|
@@ -41,6 +41,67 @@ def ingest_ping():
|
|
| 41 |
print(info, flush=True)
|
| 42 |
return jsonify(info), 200
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
@ingest_trigger_bp.route("/ingest", methods=["POST"])
|
| 45 |
def run_ingest():
|
| 46 |
if not _ingest_lock.acquire(blocking=False):
|
|
@@ -92,7 +153,7 @@ def run_ingest():
|
|
| 92 |
|
| 93 |
print("\n[DEBUG] Subprocess completed.", flush=True)
|
| 94 |
print("Return code:", result.returncode, flush=True)
|
| 95 |
-
|
| 96 |
def _preview(label, text, head=30, tail=30):
|
| 97 |
lines = (text or "").splitlines()
|
| 98 |
print(f"\n----- {label} (total lines: {len(lines)}) -----", flush=True)
|
|
@@ -110,7 +171,6 @@ def run_ingest():
|
|
| 110 |
payload = {
|
| 111 |
"status": "success" if result.returncode == 0 else "error",
|
| 112 |
"returncode": result.returncode,
|
| 113 |
-
# Return last ~4000 chars so the client can also see logs in Postman
|
| 114 |
"stdout": (result.stdout or "")[-4000:],
|
| 115 |
"stderr": (result.stderr or "")[-4000:],
|
| 116 |
}
|
|
@@ -134,5 +194,4 @@ def run_ingest():
|
|
| 134 |
try:
|
| 135 |
_ingest_lock.release()
|
| 136 |
except Exception:
|
| 137 |
-
# Avoid rare "release unlocked lock" noise
|
| 138 |
pass
|
|
|
|
| 41 |
print(info, flush=True)
|
| 42 |
return jsonify(info), 200
|
| 43 |
|
| 44 |
+
# ---------- LIGHTWEIGHT DEBUG (no ingestion run) ----------
|
| 45 |
+
@ingest_trigger_bp.route("/ingest/debug", methods=["GET"])
|
| 46 |
+
def ingest_debug():
|
| 47 |
+
import importlib
|
| 48 |
+
check_paths = ["ragg/pdfs/low", "ragg/pdfs/mid", "ragg/pdfs/high",
|
| 49 |
+
"pdfs/low", "pdfs/mid", "pdfs/high"]
|
| 50 |
+
paths_info = []
|
| 51 |
+
for p in check_paths:
|
| 52 |
+
sample = _list_dir_sample(p)
|
| 53 |
+
paths_info.append({"path": p, **sample})
|
| 54 |
+
|
| 55 |
+
mod_name = os.getenv("INGEST_MODULE", "ragg.ingest_all")
|
| 56 |
+
import_ok, callable_ok, import_error = False, False, None
|
| 57 |
+
try:
|
| 58 |
+
mod = importlib.import_module(mod_name)
|
| 59 |
+
import_ok = True
|
| 60 |
+
callable_ok = hasattr(mod, "ingest_all_levels")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
import_error = f"{type(e).__name__}: {e}"
|
| 63 |
+
|
| 64 |
+
resp = {
|
| 65 |
+
"cwd": os.getcwd(),
|
| 66 |
+
"env": {
|
| 67 |
+
"ENV": os.getenv("ENV"),
|
| 68 |
+
"INGEST_MODULE": mod_name,
|
| 69 |
+
"CHROMA_DIR": os.getenv("CHROMA_DIR"),
|
| 70 |
+
"CHROMA_ROOT": os.getenv("CHROMA_ROOT"),
|
| 71 |
+
},
|
| 72 |
+
"paths": paths_info,
|
| 73 |
+
"import_ok": import_ok,
|
| 74 |
+
"callable_ok": callable_ok,
|
| 75 |
+
"import_error": import_error
|
| 76 |
+
}
|
| 77 |
+
print("\n=== [INGEST DEBUG] ===", flush=True)
|
| 78 |
+
print(resp, flush=True)
|
| 79 |
+
return jsonify(resp), 200
|
| 80 |
+
|
| 81 |
+
# ---------- INLINE RUN (use only for debugging) ----------
|
| 82 |
+
@ingest_trigger_bp.route("/ingest/run-inline", methods=["POST"])
|
| 83 |
+
def ingest_run_inline():
|
| 84 |
+
import importlib
|
| 85 |
+
try:
|
| 86 |
+
print("\n=== [INGEST INLINE] ===", flush=True)
|
| 87 |
+
mod_name = os.getenv("INGEST_MODULE", "ragg.ingest_all")
|
| 88 |
+
print("Importing module:", mod_name, flush=True)
|
| 89 |
+
mod = importlib.import_module(mod_name)
|
| 90 |
+
if not hasattr(mod, "ingest_all_levels"):
|
| 91 |
+
return jsonify({"status": "error", "message": "ingest_all_levels() not found"}), 500
|
| 92 |
+
|
| 93 |
+
# Optional: quick preflight
|
| 94 |
+
for p in ["ragg/pdfs/low", "ragg/pdfs/mid", "ragg/pdfs/high"]:
|
| 95 |
+
print(f"[INLINE] {p} -> {_list_dir_sample(p)}", flush=True)
|
| 96 |
+
|
| 97 |
+
mod.ingest_all_levels()
|
| 98 |
+
return jsonify({"status": "success", "message": "Ingest completed inline"}), 200
|
| 99 |
+
except Exception as e:
|
| 100 |
+
print("[ERROR][INLINE]", e, flush=True)
|
| 101 |
+
print(traceback.format_exc(), flush=True)
|
| 102 |
+
return jsonify({"status": "error", "error": f"{type(e).__name__}: {e}",
|
| 103 |
+
"traceback": traceback.format_exc()}), 500
|
| 104 |
+
|
| 105 |
@ingest_trigger_bp.route("/ingest", methods=["POST"])
|
| 106 |
def run_ingest():
|
| 107 |
if not _ingest_lock.acquire(blocking=False):
|
|
|
|
| 153 |
|
| 154 |
print("\n[DEBUG] Subprocess completed.", flush=True)
|
| 155 |
print("Return code:", result.returncode, flush=True)
|
| 156 |
+
|
| 157 |
def _preview(label, text, head=30, tail=30):
|
| 158 |
lines = (text or "").splitlines()
|
| 159 |
print(f"\n----- {label} (total lines: {len(lines)}) -----", flush=True)
|
|
|
|
| 171 |
payload = {
|
| 172 |
"status": "success" if result.returncode == 0 else "error",
|
| 173 |
"returncode": result.returncode,
|
|
|
|
| 174 |
"stdout": (result.stdout or "")[-4000:],
|
| 175 |
"stderr": (result.stderr or "")[-4000:],
|
| 176 |
}
|
|
|
|
| 194 |
try:
|
| 195 |
_ingest_lock.release()
|
| 196 |
except Exception:
|
|
|
|
| 197 |
pass
|