Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| from langchain_groq import ChatGroq | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_chroma import Chroma | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from dotenv import load_dotenv | |
| # LOAD ENV | |
| load_dotenv() | |
| DbPath = "./chroma_db" | |
| def GetRagResponse(UserQuery): | |
| try: | |
| # 1. SETUP DATABASE | |
| EmbedModel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| VectorDb = Chroma(persist_directory=DbPath, embedding_function=EmbedModel) | |
| # 2. RETRIEVE | |
| Retriever = VectorDb.as_retriever(search_kwargs={"k": 5}) | |
| Docs = Retriever.invoke(UserQuery) | |
| ContextText = "\n\n".join([d.page_content for d in Docs]) | |
| # 3. LLM | |
| Llm = ChatGroq( | |
| model="llama-3.3-70b-versatile", | |
| temperature=0, | |
| api_key="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" | |
| ) | |
| # 4. CHAIN | |
| SystemPrompt = "Answer based ONLY on context. If unsure, say unknown." | |
| PromptTemplate = ChatPromptTemplate.from_messages([ | |
| ("system", SystemPrompt), | |
| ("human", "Context:\n{context}\n\nQuestion:\n{question}") | |
| ]) | |
| Chain = PromptTemplate | Llm | |
| Response = Chain.invoke({"context": ContextText, "question": UserQuery}) | |
| # 5. FORMAT SOURCES | |
| FormattedSources = [] | |
| Seen = set() | |
| for d in Docs: | |
| # Safely get page number, default to '?' if missing | |
| Page = d.metadata.get("page", "?") | |
| Filename = d.metadata.get("filename", "Unknown") | |
| Key = f"{Filename}-{Page}" | |
| if Key not in Seen: | |
| FormattedSources.append({ | |
| "source": Filename, | |
| "page": str(Page) | |
| }) | |
| Seen.add(Key) | |
| return {"answer": Response.content, "sources": FormattedSources} | |
| except Exception as e: | |
| print(f"CHAT ERROR: {e}") | |
| return {"answer": f"System Error: {str(e)}", "sources": []} | |
| def ExtractStructure(Requirement): | |
| try: | |
| # 1. SETUP | |
| EmbedModel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| VectorDb = Chroma(persist_directory=DbPath, embedding_function=EmbedModel) | |
| # INCREASE K to find the table spread across pages | |
| Retriever = VectorDb.as_retriever(search_kwargs={"k": 15}) | |
| Docs = Retriever.invoke(Requirement) | |
| ContextText = "\n\n".join([d.page_content for d in Docs]) | |
| # 2. LLM | |
| Llm = ChatGroq( | |
| model="llama-3.3-70b-versatile", | |
| temperature=0, | |
| api_key="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" | |
| ) | |
| # 3. PROMPT | |
| Prompt = f""" | |
| Extract the "{Requirement}" from the text. | |
| Look for a table with columns like: Door #, Wall Type, Frame Type, Door Type, Height, Width, Notes. | |
| Return ONLY valid JSON. | |
| Start the response with [ and end with ]. | |
| Do NOT write "Here is the JSON". | |
| Use this Schema: | |
| [ | |
| {{ | |
| "mark": "Door Number (e.g. 1, 2, D-101)", | |
| "frame_type": "Material (e.g. Hollow Metal, Aluminum)", | |
| "door_type": "Type (e.g. Single, Double Egress)", | |
| "size": "Height/Width info", | |
| "notes": "Any notes (e.g. AE601 TYP)" | |
| }} | |
| ] | |
| TEXT: | |
| {ContextText} | |
| """ | |
| Response = Llm.invoke(Prompt) | |
| RawContent = Response.content | |
| # 4. ROBUST JSON PARSING | |
| try: | |
| Start = RawContent.find('[') | |
| End = RawContent.rfind(']') + 1 | |
| if Start != -1 and End != 0: | |
| JsonStr = RawContent[Start:End] | |
| Data = json.loads(JsonStr) | |
| else: | |
| Data = json.loads(RawContent) | |
| except: | |
| print(f"JSON PARSE FAIL: {RawContent}") | |
| # If JSON fails, return empty list so UI doesn't crash | |
| Data = [] | |
| # 5. FORMAT SOURCES | |
| FormattedSources = [] | |
| Seen = set() | |
| for d in Docs: | |
| Page = d.metadata.get("page", "?") | |
| Filename = d.metadata.get("filename", "Unknown") | |
| Key = f"{Filename}-{Page}" | |
| if Key not in Seen: | |
| FormattedSources.append({ | |
| "source": Filename, | |
| "page": str(Page) | |
| }) | |
| Seen.add(Key) | |
| return {"data": Data, "sources": FormattedSources} | |
| except Exception as e: | |
| print(f"EXTRACTION ERROR: {e}") | |
| return {"data": [], "sources": []} |