Oviya commited on
Commit
34efa08
·
1 Parent(s): 0dc5a09
Files changed (2) hide show
  1. ragg/ingest_all.py +2 -1
  2. ragg/ingest_trigger.py +38 -16
ragg/ingest_all.py CHANGED
@@ -1,6 +1,7 @@
1
  # ingest_all.py
2
  import os
3
  from ragg.rag_backend import ingest_pdfs_from_folder, get_embeddings
 
4
  from langchain_community.vectorstores import Chroma
5
 
6
  def ingest_all_levels():
@@ -37,7 +38,7 @@ def ingest_all_levels():
37
 
38
 
39
  # ✅ Temporarily replace the function used in rag_backend
40
- import rag_backend
41
  rag_backend.get_vectorstore = get_vectorstore_for_level
42
 
43
  # ✅ Ingest PDFs for this level
 
1
  # ingest_all.py
2
  import os
3
  from ragg.rag_backend import ingest_pdfs_from_folder, get_embeddings
4
+ # from ragg.rag_backend import ingest_pdfs_from_folder, get_embeddings
5
  from langchain_community.vectorstores import Chroma
6
 
7
  def ingest_all_levels():
 
38
 
39
 
40
  # ✅ Temporarily replace the function used in rag_backend
41
+ import ragg.rag_backend as rag_backend
42
  rag_backend.get_vectorstore = get_vectorstore_for_level
43
 
44
  # ✅ Ingest PDFs for this level
ragg/ingest_trigger.py CHANGED
@@ -1,32 +1,54 @@
1
  import os
2
- import subprocess
3
  import sys
4
- from flask import Blueprint, jsonify
 
 
 
5
 
6
  ingest_trigger_bp = Blueprint("ingest_trigger_bp", __name__)
7
 
 
 
 
 
 
 
 
8
  @ingest_trigger_bp.route("/ingest", methods=["POST"])
9
  def run_ingest():
10
- """
11
- Trigger the ingestion manually via Postman.
12
- Runs rag/ingest_all.py script inside the Hugging Face container.
13
- """
14
  try:
15
- script_path = os.path.join(os.path.dirname(__file__), "ingest_all.py")
16
- print(f"🔹 Running ingestion script: {script_path}")
 
 
 
 
 
17
 
18
  result = subprocess.run(
19
- [sys.executable, "-m", "rag.ingest_all"], # <-- run as package module
20
  capture_output=True,
21
  text=True,
22
  cwd=None,
23
- timeout=int(os.getenv("INGEST_TIMEOUT_SEC", "1800"))
24
  )
25
 
 
 
 
 
 
 
 
 
 
26
  return jsonify({
27
- "status": "success",
28
- "stdout": result.stdout,
29
- "stderr": result.stderr
30
- })
31
- except Exception as e:
32
- return jsonify({"status": "error", "message": str(e)}), 500
 
1
  import os
 
2
  import sys
3
+ import subprocess
4
+ import threading
5
+ import traceback
6
+ from flask import Blueprint, jsonify, request
7
 
8
  ingest_trigger_bp = Blueprint("ingest_trigger_bp", __name__)
9
 
10
+ # Prevent concurrent executions
11
+ _ingest_lock = threading.Lock()
12
+
13
+ @ingest_trigger_bp.route("/ingest/ping", methods=["GET"])
14
+ def ingest_ping():
15
+ return jsonify({"ok": True, "cwd": os.getcwd()}), 200
16
+
17
  @ingest_trigger_bp.route("/ingest", methods=["POST"])
18
  def run_ingest():
19
+ if not _ingest_lock.acquire(blocking=False):
20
+ return jsonify({"status": "busy", "message": "Ingestion already in progress"}), 409
21
+
 
22
  try:
23
+ # Run as module to ensure package imports work
24
+ module_name = os.getenv("INGEST_MODULE", "ragg.ingest_all")
25
+
26
+ print("Trigger called", flush=True)
27
+ print("Module:", module_name, flush=True)
28
+ print("CWD:", os.getcwd(), flush=True)
29
+ print("PYTHONPATH:", sys.path, flush=True)
30
 
31
  result = subprocess.run(
32
+ [sys.executable, "-m", module_name],
33
  capture_output=True,
34
  text=True,
35
  cwd=None,
36
+ timeout=int(os.getenv("INGEST_TIMEOUT_SEC", "1800")) # default 30 min
37
  )
38
 
39
+ payload = {
40
+ "status": "success" if result.returncode == 0 else "error",
41
+ "returncode": result.returncode,
42
+ "stdout": result.stdout[-20000:], # trim if very long
43
+ "stderr": result.stderr[-20000:],
44
+ }
45
+ return jsonify(payload), 200 if result.returncode == 0 else 500
46
+
47
+ except Exception:
48
  return jsonify({
49
+ "status": "error",
50
+ "message": "trigger crashed",
51
+ "traceback": traceback.format_exc()
52
+ }), 500
53
+ finally:
54
+ _ingest_lock.release()