File size: 4,726 Bytes
90af697
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e2ce37
90af697
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cef940
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

import json
import os
from langchain_groq import ChatGroq 
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv

# LOAD ENV
load_dotenv()

DbPath = "./chroma_db"

def GetRagResponse(UserQuery):
    try:
        # 1. SETUP DATABASE
        EmbedModel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        VectorDb = Chroma(persist_directory=DbPath, embedding_function=EmbedModel)
        
        # 2. RETRIEVE
        Retriever = VectorDb.as_retriever(search_kwargs={"k": 5})
        Docs = Retriever.invoke(UserQuery)
        ContextText = "\n\n".join([d.page_content for d in Docs])
        
        # 3. LLM
        Llm = ChatGroq(
            model="llama-3.3-70b-versatile", 
            temperature=0,
            api_key="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"
        )

        # 4. CHAIN
        SystemPrompt = "Answer based ONLY on context. If unsure, say unknown."
        PromptTemplate = ChatPromptTemplate.from_messages([
            ("system", SystemPrompt),
            ("human", "Context:\n{context}\n\nQuestion:\n{question}")
        ])
        Chain = PromptTemplate | Llm
        Response = Chain.invoke({"context": ContextText, "question": UserQuery})
        
        # 5. FORMAT SOURCES
        FormattedSources = []
        Seen = set()
        for d in Docs:
            # Safely get page number, default to '?' if missing
            Page = d.metadata.get("page", "?")
            Filename = d.metadata.get("filename", "Unknown")
            Key = f"{Filename}-{Page}"
            
            if Key not in Seen:
                FormattedSources.append({
                    "source": Filename,
                    "page": str(Page)
                })
                Seen.add(Key)

        return {"answer": Response.content, "sources": FormattedSources}

    except Exception as e:
        print(f"CHAT ERROR: {e}")
        return {"answer": f"System Error: {str(e)}", "sources": []}

def ExtractStructure(Requirement):
    try:
        # 1. SETUP
        EmbedModel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        VectorDb = Chroma(persist_directory=DbPath, embedding_function=EmbedModel)
        # INCREASE K to find the table spread across pages
        Retriever = VectorDb.as_retriever(search_kwargs={"k": 15}) 
        Docs = Retriever.invoke(Requirement)
        ContextText = "\n\n".join([d.page_content for d in Docs])
        
        # 2. LLM
        Llm = ChatGroq(
            model="llama-3.3-70b-versatile", 
            temperature=0,
            api_key="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"
        )

        # 3. PROMPT 
        Prompt = f"""
        Extract the "{Requirement}" from the text.
        
        Look for a table with columns like: Door #, Wall Type, Frame Type, Door Type, Height, Width, Notes.
        
        Return ONLY valid JSON.
        Start the response with [ and end with ].
        Do NOT write "Here is the JSON".
        
        Use this Schema: 
        [
            {{
                "mark": "Door Number (e.g. 1, 2, D-101)",
                "frame_type": "Material (e.g. Hollow Metal, Aluminum)",
                "door_type": "Type (e.g. Single, Double Egress)",
                "size": "Height/Width info",
                "notes": "Any notes (e.g. AE601 TYP)"
            }}
        ]

        TEXT:
        {ContextText}
        """
        
        Response = Llm.invoke(Prompt)
        RawContent = Response.content
        
        # 4. ROBUST JSON PARSING
        try:
            Start = RawContent.find('[')
            End = RawContent.rfind(']') + 1
            if Start != -1 and End != 0:
                JsonStr = RawContent[Start:End]
                Data = json.loads(JsonStr)
            else:
                Data = json.loads(RawContent)
        except:
            print(f"JSON PARSE FAIL: {RawContent}")
            # If JSON fails, return empty list so UI doesn't crash
            Data = []
        
        # 5. FORMAT SOURCES
        FormattedSources = []
        Seen = set()
        for d in Docs:
            Page = d.metadata.get("page", "?")
            Filename = d.metadata.get("filename", "Unknown")
            Key = f"{Filename}-{Page}"
            if Key not in Seen:
                FormattedSources.append({
                    "source": Filename,
                    "page": str(Page)
                })
                Seen.add(Key)
        
        return {"data": Data, "sources": FormattedSources}

    except Exception as e:
        print(f"EXTRACTION ERROR: {e}")
        return {"data": [], "sources": []}