Update src/ingestion.py
Browse files- src/ingestion.py +25 -2
src/ingestion.py
CHANGED
|
@@ -135,9 +135,31 @@ def adaptive_fallback_toc(text: str, model_name: str = "gpt-4o"):
|
|
| 135 |
"""
|
| 136 |
snippet = text[:7000]
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
try:
|
| 139 |
print(f"⚙️ Invoking GenAI proxy for TOC inference using model: {model_name}")
|
| 140 |
-
proxy_client = get_proxy_client("gen-ai-hub")
|
|
|
|
| 141 |
llm = ChatOpenAI(
|
| 142 |
proxy_model_name=model_name,
|
| 143 |
proxy_client=proxy_client,
|
|
@@ -155,7 +177,7 @@ def adaptive_fallback_toc(text: str, model_name: str = "gpt-4o"):
|
|
| 155 |
"""
|
| 156 |
|
| 157 |
response = llm.invoke(prompt)
|
| 158 |
-
response_text =
|
| 159 |
|
| 160 |
# Extract clean TOC-like lines
|
| 161 |
lines = [
|
|
@@ -172,6 +194,7 @@ def adaptive_fallback_toc(text: str, model_name: str = "gpt-4o"):
|
|
| 172 |
print(f"⚠️ AI TOC fallback failed via GenAI proxy: {e}")
|
| 173 |
return []
|
| 174 |
|
|
|
|
| 175 |
# ==========================================================
|
| 176 |
# 3B️⃣ UNIFIED WRAPPER (Heuristic + AI Hybrid)
|
| 177 |
# ==========================================================
|
|
|
|
| 135 |
"""
|
| 136 |
snippet = text[:7000]
|
| 137 |
|
| 138 |
+
# ✅ Read GenAI proxy credentials (same JSON used by QA)
|
| 139 |
+
creds_path = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
|
| 140 |
+
base_url = ""
|
| 141 |
+
|
| 142 |
+
if os.path.exists(creds_path):
|
| 143 |
+
try:
|
| 144 |
+
with open(creds_path, "r") as f:
|
| 145 |
+
creds = json.load(f)
|
| 146 |
+
# Try all known locations for base URL
|
| 147 |
+
base_url = (
|
| 148 |
+
creds.get("base_url")
|
| 149 |
+
or creds.get("serviceurls", {}).get("AI_API_URL", "")
|
| 150 |
+
or creds.get("AICORE_BASE_URL", "")
|
| 151 |
+
)
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"⚠️ Could not read GenAI proxy credentials: {e}")
|
| 154 |
+
|
| 155 |
+
if not base_url:
|
| 156 |
+
print("⚠️ Missing AI_API_URL or base_url in credentials — skipping fallback.")
|
| 157 |
+
return []
|
| 158 |
+
|
| 159 |
try:
|
| 160 |
print(f"⚙️ Invoking GenAI proxy for TOC inference using model: {model_name}")
|
| 161 |
+
proxy_client = get_proxy_client("gen-ai-hub", base_url=base_url)
|
| 162 |
+
|
| 163 |
llm = ChatOpenAI(
|
| 164 |
proxy_model_name=model_name,
|
| 165 |
proxy_client=proxy_client,
|
|
|
|
| 177 |
"""
|
| 178 |
|
| 179 |
response = llm.invoke(prompt)
|
| 180 |
+
response_text = getattr(response, "content", str(response))
|
| 181 |
|
| 182 |
# Extract clean TOC-like lines
|
| 183 |
lines = [
|
|
|
|
| 194 |
print(f"⚠️ AI TOC fallback failed via GenAI proxy: {e}")
|
| 195 |
return []
|
| 196 |
|
| 197 |
+
|
| 198 |
# ==========================================================
|
| 199 |
# 3B️⃣ UNIFIED WRAPPER (Heuristic + AI Hybrid)
|
| 200 |
# ==========================================================
|