Spaces:

Bhanushray
/

project-brain-backend

Sleeping

App Files Files Community

Bhanushray commited on Dec 1, 2025

Commit

90af697

verified ·

1 Parent(s): 6d2565e

Update backend/rag_engine.py

Browse files

Files changed (1) hide show

backend/rag_engine.py +140 -140

backend/rag_engine.py CHANGED Viewed

@@ -1,141 +1,141 @@
-import json
-import os
-from langchain_groq import ChatGroq
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_chroma import Chroma
-from langchain_core.prompts import ChatPromptTemplate
-from dotenv import load_dotenv
-# LOAD ENV
-load_dotenv()
-DbPath = "./chroma_db"
-def GetRagResponse(UserQuery):
-    try:
-        # 1. SETUP DATABASE
-        EmbedModel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-        VectorDb = Chroma(persist_directory=DbPath, embedding_function=EmbedModel)
-        # 2. RETRIEVE
-        Retriever = VectorDb.as_retriever(search_kwargs={"k": 5})
-        Docs = Retriever.invoke(UserQuery)
-        ContextText = "\n\n".join([d.page_content for d in Docs])
-        # 3. LLM
-        Llm = ChatGroq(
-            model="llama-3.3-70b-versatile",
-            temperature=0,
-            api_key=os.getenv("GROQ_API_KEY")
-        )
-        # 4. CHAIN
-        SystemPrompt = "Answer based ONLY on context. If unsure, say unknown."
-        PromptTemplate = ChatPromptTemplate.from_messages([
-            ("system", SystemPrompt),
-            ("human", "Context:\n{context}\n\nQuestion:\n{question}")
-        ])
-        Chain = PromptTemplate | Llm
-        Response = Chain.invoke({"context": ContextText, "question": UserQuery})
-        # 5. FORMAT SOURCES
-        FormattedSources = []
-        Seen = set()
-        for d in Docs:
-            # Safely get page number, default to '?' if missing
-            Page = d.metadata.get("page", "?")
-            Filename = d.metadata.get("filename", "Unknown")
-            Key = f"{Filename}-{Page}"
-            if Key not in Seen:
-                FormattedSources.append({
-                    "source": Filename,
-                    "page": str(Page)
-                })
-                Seen.add(Key)
-        return {"answer": Response.content, "sources": FormattedSources}
-    except Exception as e:
-        print(f"CHAT ERROR: {e}")
-        return {"answer": f"System Error: {str(e)}", "sources": []}
-def ExtractStructure(Requirement):
-    try:
-        # 1. SETUP
-        EmbedModel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-        VectorDb = Chroma(persist_directory=DbPath, embedding_function=EmbedModel)
-        # INCREASE K to find the table spread across pages
-        Retriever = VectorDb.as_retriever(search_kwargs={"k": 15})
-        Docs = Retriever.invoke(Requirement)
-        ContextText = "\n\n".join([d.page_content for d in Docs])
-        # 2. LLM
-        Llm = ChatGroq(
-            model="llama-3.3-70b-versatile",
-            temperature=0,
-            api_key=os.getenv("GROQ_API_KEY")
-        )
-        # 3. PROMPT
-        Prompt = f"""
-        Extract the "{Requirement}" from the text.
-        Look for a table with columns like: Door #, Wall Type, Frame Type, Door Type, Height, Width, Notes.
-        Return ONLY valid JSON.
-        Start the response with [ and end with ].
-        Do NOT write "Here is the JSON".
-        Use this Schema:
-        [
-            {{
-                "mark": "Door Number (e.g. 1, 2, D-101)",
-                "frame_type": "Material (e.g. Hollow Metal, Aluminum)",
-                "door_type": "Type (e.g. Single, Double Egress)",
-                "size": "Height/Width info",
-                "notes": "Any notes (e.g. AE601 TYP)"
-            }}
-        ]
-        TEXT:
-        {ContextText}
-        """
-        Response = Llm.invoke(Prompt)
-        RawContent = Response.content
-        # 4. ROBUST JSON PARSING
-        try:
-            Start = RawContent.find('[')
-            End = RawContent.rfind(']') + 1
-            if Start != -1 and End != 0:
-                JsonStr = RawContent[Start:End]
-                Data = json.loads(JsonStr)
-            else:
-                Data = json.loads(RawContent)
-        except:
-            print(f"JSON PARSE FAIL: {RawContent}")
-            # If JSON fails, return empty list so UI doesn't crash
-            Data = []
-        # 5. FORMAT SOURCES
-        FormattedSources = []
-        Seen = set()
-        for d in Docs:
-            Page = d.metadata.get("page", "?")
-            Filename = d.metadata.get("filename", "Unknown")
-            Key = f"{Filename}-{Page}"
-            if Key not in Seen:
-                FormattedSources.append({
-                    "source": Filename,
-                    "page": str(Page)
-                })
-                Seen.add(Key)
-        return {"data": Data, "sources": FormattedSources}
-    except Exception as e:
-        print(f"EXTRACTION ERROR: {e}")
         return {"data": [], "sources": []}

+import json
+import os
+from langchain_groq import ChatGroq
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+from langchain_core.prompts import ChatPromptTemplate
+from dotenv import load_dotenv
+# LOAD ENV
+load_dotenv()
+DbPath = "./chroma_db"
+def GetRagResponse(UserQuery):
+    try:
+        # 1. SETUP DATABASE
+        EmbedModel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+        VectorDb = Chroma(persist_directory=DbPath, embedding_function=EmbedModel)
+        # 2. RETRIEVE
+        Retriever = VectorDb.as_retriever(search_kwargs={"k": 5})
+        Docs = Retriever.invoke(UserQuery)
+        ContextText = "\n\n".join([d.page_content for d in Docs])
+        # 3. LLM
+        Llm = ChatGroq(
+            model="llama-3.3-70b-versatile",
+            temperature=0,
+            api_key="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"
+        )
+        # 4. CHAIN
+        SystemPrompt = "Answer based ONLY on context. If unsure, say unknown."
+        PromptTemplate = ChatPromptTemplate.from_messages([
+            ("system", SystemPrompt),
+            ("human", "Context:\n{context}\n\nQuestion:\n{question}")
+        ])
+        Chain = PromptTemplate | Llm
+        Response = Chain.invoke({"context": ContextText, "question": UserQuery})
+        # 5. FORMAT SOURCES
+        FormattedSources = []
+        Seen = set()
+        for d in Docs:
+            # Safely get page number, default to '?' if missing
+            Page = d.metadata.get("page", "?")
+            Filename = d.metadata.get("filename", "Unknown")
+            Key = f"{Filename}-{Page}"
+            if Key not in Seen:
+                FormattedSources.append({
+                    "source": Filename,
+                    "page": str(Page)
+                })
+                Seen.add(Key)
+        return {"answer": Response.content, "sources": FormattedSources}
+    except Exception as e:
+        print(f"CHAT ERROR: {e}")
+        return {"answer": f"System Error: {str(e)}", "sources": []}
+def ExtractStructure(Requirement):
+    try:
+        # 1. SETUP
+        EmbedModel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+        VectorDb = Chroma(persist_directory=DbPath, embedding_function=EmbedModel)
+        # INCREASE K to find the table spread across pages
+        Retriever = VectorDb.as_retriever(search_kwargs={"k": 15})
+        Docs = Retriever.invoke(Requirement)
+        ContextText = "\n\n".join([d.page_content for d in Docs])
+        # 2. LLM
+        Llm = ChatGroq(
+            model="llama-3.3-70b-versatile",
+            temperature=0,
+            api_key=os.getenv("GROQ_API_KEY")
+        )
+        # 3. PROMPT
+        Prompt = f"""
+        Extract the "{Requirement}" from the text.
+        Look for a table with columns like: Door #, Wall Type, Frame Type, Door Type, Height, Width, Notes.
+        Return ONLY valid JSON.
+        Start the response with [ and end with ].
+        Do NOT write "Here is the JSON".
+        Use this Schema:
+        [
+            {{
+                "mark": "Door Number (e.g. 1, 2, D-101)",
+                "frame_type": "Material (e.g. Hollow Metal, Aluminum)",
+                "door_type": "Type (e.g. Single, Double Egress)",
+                "size": "Height/Width info",
+                "notes": "Any notes (e.g. AE601 TYP)"
+            }}
+        ]
+        TEXT:
+        {ContextText}
+        """
+        Response = Llm.invoke(Prompt)
+        RawContent = Response.content
+        # 4. ROBUST JSON PARSING
+        try:
+            Start = RawContent.find('[')
+            End = RawContent.rfind(']') + 1
+            if Start != -1 and End != 0:
+                JsonStr = RawContent[Start:End]
+                Data = json.loads(JsonStr)
+            else:
+                Data = json.loads(RawContent)
+        except:
+            print(f"JSON PARSE FAIL: {RawContent}")
+            # If JSON fails, return empty list so UI doesn't crash
+            Data = []
+        # 5. FORMAT SOURCES
+        FormattedSources = []
+        Seen = set()
+        for d in Docs:
+            Page = d.metadata.get("page", "?")
+            Filename = d.metadata.get("filename", "Unknown")
+            Key = f"{Filename}-{Page}"
+            if Key not in Seen:
+                FormattedSources.append({
+                    "source": Filename,
+                    "page": str(Page)
+                })
+                Seen.add(Key)
+        return {"data": Data, "sources": FormattedSources}
+    except Exception as e:
+        print(f"EXTRACTION ERROR: {e}")
         return {"data": [], "sources": []}