Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Running

App Files Files Community

Shubham170793 commited on Oct 19

Commit

0dc8e87

verified ·

1 Parent(s): cb020cf

Update src/ingestion.py

Browse files

Files changed (1) hide show

src/ingestion.py +19 -5

src/ingestion.py CHANGED Viewed

@@ -131,19 +131,20 @@ from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
 def adaptive_fallback_toc(text: str, model_name: str = "gpt-4o"):
     """
     Uses SAP GenAI Hub proxy (same as QA pipeline) to infer a Table of Contents.
-    This avoids manual auth and ensures consistent credentials across the app.
     """
     snippet = text[:7000]
-    # ✅ Read GenAI proxy credentials (same JSON used by QA)
-    creds_path = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
     base_url = ""
     if os.path.exists(creds_path):
         try:
             with open(creds_path, "r") as f:
                 creds = json.load(f)
-                # Try all known locations for base URL
                 base_url = (
                     creds.get("base_url")
                     or creds.get("serviceurls", {}).get("AI_API_URL", "")
@@ -151,11 +152,23 @@ def adaptive_fallback_toc(text: str, model_name: str = "gpt-4o"):
                 )
         except Exception as e:
             print(f"⚠️ Could not read GenAI proxy credentials: {e}")
     if not base_url:
         print("⚠️ Missing AI_API_URL or base_url in credentials — skipping fallback.")
         return []
     try:
         print(f"⚙️ Invoking GenAI proxy for TOC inference using model: {model_name}")
         proxy_client = get_proxy_client("gen-ai-hub", base_url=base_url)
@@ -179,7 +192,7 @@ def adaptive_fallback_toc(text: str, model_name: str = "gpt-4o"):
         response = llm.invoke(prompt)
         response_text = getattr(response, "content", str(response))
-        # Extract clean TOC-like lines
         lines = [
             re.sub(r"^[0-9.\-•\s]+", "", l.strip())
             for l in response_text.splitlines()
@@ -195,6 +208,7 @@ def adaptive_fallback_toc(text: str, model_name: str = "gpt-4o"):
         return []
 # ==========================================================
 # 3B️⃣ UNIFIED WRAPPER (Heuristic + AI Hybrid)
 # ==========================================================

 def adaptive_fallback_toc(text: str, model_name: str = "gpt-4o"):
     """
     Uses SAP GenAI Hub proxy (same as QA pipeline) to infer a Table of Contents.
+    This ensures consistent credentials, no manual token handling, and safe reuse
+    of your existing GEN AI HUB PROXY.json configuration.
     """
     snippet = text[:7000]
+    creds = {}
     base_url = ""
+    # ✅ Load credentials from same JSON as QA pipeline
+    creds_path = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
     if os.path.exists(creds_path):
         try:
             with open(creds_path, "r") as f:
                 creds = json.load(f)
                 base_url = (
                     creds.get("base_url")
                     or creds.get("serviceurls", {}).get("AI_API_URL", "")
                 )
         except Exception as e:
             print(f"⚠️ Could not read GenAI proxy credentials: {e}")
+    else:
+        print("⚠️ No SAP GenAI credentials file found — skipping AI fallback.")
+        return []
     if not base_url:
         print("⚠️ Missing AI_API_URL or base_url in credentials — skipping fallback.")
         return []
+    # ✅ Inject credentials into environment (matches QA setup)
+    os.environ.update({
+        "AICORE_AUTH_URL": creds.get("url", ""),
+        "AICORE_CLIENT_ID": creds.get("clientid") or creds.get("client_id", ""),
+        "AICORE_CLIENT_SECRET": creds.get("clientsecret") or creds.get("client_secret", ""),
+        "AICORE_RESOURCE_GROUP": "default",
+        "AICORE_BASE_URL": base_url
+    })
     try:
         print(f"⚙️ Invoking GenAI proxy for TOC inference using model: {model_name}")
         proxy_client = get_proxy_client("gen-ai-hub", base_url=base_url)
         response = llm.invoke(prompt)
         response_text = getattr(response, "content", str(response))
+        # ✅ Extract clean TOC-like lines
         lines = [
             re.sub(r"^[0-9.\-•\s]+", "", l.strip())
             for l in response_text.splitlines()
         return []
 # ==========================================================
 # 3B️⃣ UNIFIED WRAPPER (Heuristic + AI Hybrid)
 # ==========================================================