Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 19

Commit

52aa0b1

verified ·

1 Parent(s): 8afec0a

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +56 -30

src/qa.py CHANGED Viewed

@@ -48,34 +48,50 @@ except Exception as e:
     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
 # ==========================================================
-# 3️⃣ GPT-4o via SAP Gen AI Hub
 # ==========================================================
-print("✅ Loading GPT-4o via SAP Gen AI Hub...")
 CRED_PATH = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
-try:
-    with open(CRED_PATH, "r") as key_file:
-        svcKey = json.load(key_file)
-    os.environ.update({
-        "AICORE_AUTH_URL": svcKey["url"],
-        "AICORE_CLIENT_ID": svcKey["clientid"],
-        "AICORE_CLIENT_SECRET": svcKey["clientsecret"],
-        "AICORE_RESOURCE_GROUP": "default",
-        "AICORE_BASE_URL": svcKey["serviceurls"]["AI_API_URL"]
-    })
-    proxy_client = get_proxy_client("gen-ai-hub")
-    chat_llm = ChatOpenAI(
-        proxy_model_name="gpt-4o",
-        proxy_client=proxy_client,
-        temperature=0.3,
-        max_tokens=1500
-    )
-    print("✅ GPT-4o (via Gen AI Hub) ready for generation.")
-except Exception as e:
-    print(f"⚠️ Gen AI Hub setup failed: {e}")
-    chat_llm = None
 # ==========================================================
 # 4️⃣ Embedding Generator (batch-optimized)
@@ -234,14 +250,19 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
         return []
 # ==========================================================
-# 8️⃣ Answer Generation
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
-    if chat_llm is None:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
     prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
@@ -256,18 +277,23 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
         {"role": "user", "content": prompt},
     ]
     try:
-        response = chat_llm.invoke(messages)
         return response.content.strip()
     except Exception as e:
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 9️⃣ Generic Text Generation Helper (for AI suggestions)
 # ==========================================================
 def genai_generate(prompt: str) -> str:
-    if chat_llm is None:
         raise RuntimeError("⚠️ GPT-4o not initialized. Check credentials or rebuild the Space.")
     messages = [
@@ -276,7 +302,7 @@ def genai_generate(prompt: str) -> str:
     ]
     try:
-        response = chat_llm.invoke(messages)
         return response.content.strip()
     except Exception as e:
         print(f"⚠️ genai_generate() failed: {e}")

     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
 # ==========================================================
+# 3️⃣ GPT-4o via SAP Gen AI Hub — Lazy / On-demand initialization
 # ==========================================================
+from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
+from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
 CRED_PATH = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
+_chat_llm = None  # cached instance
+def get_chat_llm(model_name: str = "gpt-4o", temperature: float = 0.3, max_tokens: int = 1500):
+    """
+    Lazily initializes ChatOpenAI via Gen AI Hub proxy.
+    Only runs when first needed; cached afterward.
+    """
+    global _chat_llm
+    if _chat_llm is not None:
+        return _chat_llm
+    try:
+        # Optional: set environment variables from service key if present
+        if os.path.exists(CRED_PATH):
+            with open(CRED_PATH, "r") as key_file:
+                svcKey = json.load(key_file)
+            os.environ.update({
+                "AICORE_AUTH_URL": svcKey.get("url", ""),
+                "AICORE_CLIENT_ID": svcKey.get("clientid", ""),
+                "AICORE_CLIENT_SECRET": svcKey.get("clientsecret", ""),
+                "AICORE_BASE_URL": svcKey.get("serviceurls", {}).get("AI_API_URL", ""),
+            })
+        proxy_client = get_proxy_client("gen-ai-hub")
+        _chat_llm = ChatOpenAI(
+            proxy_model_name=model_name,
+            proxy_client=proxy_client,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        print(f"✅ GPT-4o (via Gen AI Hub) initialized lazily for model: {model_name}")
+        return _chat_llm
+    except Exception as e:
+        print(f"⚠️ Gen AI Hub lazy init failed: {e}")
+        _chat_llm = None
+        raise
 # ==========================================================
 # 4️⃣ Embedding Generator (batch-optimized)
         return []
 # ==========================================================
+# 8️⃣ Answer Generation (Lazy GPT-4o Initialization)
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
+    # Try lazy initialization
+    try:
+        chat_llm_local = get_chat_llm()
+    except Exception:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
+    # Build context and prompt
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
     prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
         {"role": "user", "content": prompt},
     ]
+    # Invoke GPT-4o
     try:
+        response = chat_llm_local.invoke(messages)
         return response.content.strip()
     except Exception as e:
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 9️⃣ Generic Text Generation Helper (for AI suggestions)
 # ==========================================================
 def genai_generate(prompt: str) -> str:
+    # Try lazy initialization
+    try:
+        chat_llm_local = get_chat_llm()
+    except Exception:
         raise RuntimeError("⚠️ GPT-4o not initialized. Check credentials or rebuild the Space.")
     messages = [
     ]
     try:
+        response = chat_llm_local.invoke(messages)
         return response.content.strip()
     except Exception as e:
         print(f"⚠️ genai_generate() failed: {e}")