Spaces:

IBHS
/

RoofingRoadmap

Sleeping

App Files Files

IBHS commited on Mar 1, 2025

Commit

fa8ee23

verified ·

1 Parent(s): 04aa1c8

Upload 13 files

Browse files

Files changed (14) hide show

.gitattributes +1 -0
LiteratureAgent.py +139 -0
Refiner.py +60 -0
chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/data_level0.bin +3 -0
chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/header.bin +3 -0
chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/index_metadata.pickle +3 -0
chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/length.bin +3 -0
chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/link_lists.bin +3 -0
chroma_langchain_db/chroma.sqlite3 +3 -0
config.py +56 -5
helpers.py +2 -1
main.py +64 -107
refine.pdf +0 -0
requirements.txt +2 -1

.gitattributes CHANGED Viewed

@@ -78,3 +78,4 @@ Articles/wind_loads_on_discontinuous_metal_roofing_ibhs.pdf filter=lfs diff=lfs
 Articles/wind_uplift_resistance_of_artificially_and_naturally_aged_asphalt_shingles.pdf filter=lfs diff=lfs merge=lfs -text
 Articles/wind_vulnerability_analysis_of_standing_seam_roof_system_considering_fatigue_damage.pdf filter=lfs diff=lfs merge=lfs -text
 chroma_store/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

 Articles/wind_uplift_resistance_of_artificially_and_naturally_aged_asphalt_shingles.pdf filter=lfs diff=lfs merge=lfs -text
 Articles/wind_vulnerability_analysis_of_standing_seam_roof_system_considering_fatigue_damage.pdf filter=lfs diff=lfs merge=lfs -text
 chroma_store/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
+chroma_langchain_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

LiteratureAgent.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from langchain_openai import OpenAIEmbeddings
+from langchain_openai import ChatOpenAI
+from langchain_chroma import Chroma
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import XMLOutputParser
+from langchain.chains import create_retrieval_chain
+from langchain_core.documents import Document
+from typing import List
+XML_SYSTEM_PROMPT = """You're a helpful AI assistant. Given a user question and some scientific literature
+documents which highlight research on different roof cover materials (e.g., asphalt shingles, metal, tile)
+and their performance against natural hazards(e.g., wind, hail), answer the user
+question.
+You’re a helpful AI assistant. Given a user question and scientific literature on various roof cover materials (e.g., asphalt shingles, metal, tile)
+and their performance against natural hazards (e.g., wind, hail), provide clear, concise, and informed answers without unnecessary fluff.
+When addressing questions about ‘what is the best roof,’ consider the following factors:
+	•	Geography, aesthetic preferences, budget, frequency of weather-related hazards, roof cover performance, and how performance changes with age.
+	•	For the insurance industry, the ‘best roof’ depends on the specific hazards (their location and frequency), performance expectations and predictability, and the cost of materials.
+If none of the articles answer the question, simply say that there are no articles relevant to your inquiry.
+Remember, you must return both an answer and citations. A citation consists of a VERBATIM quote that
+justifies the answer and the ID and also Source Name of the quote article. Return a citation for every quote across all articles
+that justify the answer. Use the following format for your final output:
+<cited_answer>
+    <answer></answer>
+    <citations>
+        <citation><source_id></source_id><source></source><quote></quote></citation>
+        <citation><source_id></source_id><source></source><quote></quote></citation>
+        ...
+    </citations>
+</cited_answer>
+If none of the articles answer the question, return:
+<cited_answer>
+    <answer>Nothing</answer>
+    <citations/>
+</cited_answer>
+ALWAYS maintain valid XML structure with properly closed tags. Here are the articles:{context}"""
+class RoofCoverChatbot:
+    def __init__(self, model: str = "gpt-4o", temperature: float = 0.1):
+        """
+        Initialize the RoofCoverChatbot by setting up the retrieval chain,
+        which uses scientific literature documents to generate an XML-formatted answer.
+        """
+        # Create the XML prompt template.
+        self.xml_prompt = ChatPromptTemplate.from_messages(
+            [("system", XML_SYSTEM_PROMPT), ("human", "{input}")]
+        )
+        # Initialize the language model.
+        self.llm = ChatOpenAI(model=model, temperature=temperature)
+        # Create the chain that refines answers using retrieved documents.
+        # The first step formats the retrieved context as XML.
+        rag_chain_from_docs = (
+                RunnablePassthrough.assign(
+                    context=(lambda x: self.format_docs_xml(x["context"]))
+                )
+                | self.xml_prompt
+                | self.llm
+                | XMLOutputParser()
+        )
+        embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
+        self.vectordb = Chroma(
+            collection_name="roofs_collection",
+            embedding_function=embeddings,
+            persist_directory="./chroma_langchain_db",
+        )
+        # Use similarity search to retrieve the top-K documents.
+        self.retriever = self.vectordb.as_retriever(
+            search_type="similarity", search_kwargs={"k": 5}
+        )
+        # This lambda extracts the "input" key for retrieval.
+        retrieve_docs = (lambda x: x["input"]) | self.retriever
+        # Build the final chain: retrieve documents then generate an answer.
+        self.chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
+            answer=rag_chain_from_docs
+        )
+    @staticmethod
+    def format_docs_xml(docs: List[Document]) -> str:
+        """
+        Format a list of documents into XML snippets.
+        Each document is formatted with its source metadata and a snippet of its content.
+        """
+        formatted_docs = [
+            (
+                f"<source id=\"{i}\">\n"
+                f"<source>{doc.metadata['source']}</source>\n"
+                f"<article_snippet>{doc.page_content}</article_snippet>\n"
+                f"</source>"
+            )
+            for i, doc in enumerate(docs)
+        ]
+        return f"\n\n<sources>\n{chr(10).join(formatted_docs)}\n</sources>"
+    def get_response(self, query: str) -> str:
+        """
+        Return the chatbot response for the given query.
+        The method retrieves relevant documents and then uses the XML chain to generate
+        an answer with citations.
+        :param query: The user question.
+        :return: XML-formatted answer with citations.
+        """
+        return self.chain.invoke({"input": query})
+    def get_extra_resources(self, query: str, original_sources: List[str]):
+        """
+        Invokes the retriever using the given query and returns additional resources.
+        Uses the retriever to fetch resources based on the input query string. This
+        method facilitates targeted resource retrieval based on provided input.
+        :param query: A string representing the query to be processed by the retriever.
+        :type query: str
+        :return: The resources or data obtained from the retriever after processing
+            the query.
+        """
+        retriever = self.vectordb.as_retriever(
+            search_type="similarity", search_kwargs={"k": 8, "filter":{"source": {"$nin": original_sources}}}
+        )
+        result =  retriever.invoke(query)
+        return result

Refiner.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.runnables import RunnableLambda
+from langchain_core.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from pydantic import BaseModel, Field
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_core.vectorstores import InMemoryVectorStore
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from operator import itemgetter
+from config import NEW_REFINE_SYSTEM_PROMPT_JSON
+class Answer(BaseModel):
+    enhanced_question: str = Field(description="Paraphrased question")
+    enhanced_answer: str = Field(description="Enhanced answer")
+class RefinementPipeline:
+    def __init__(self, model: str = "gpt-4o", temperature: float = 0.1):
+        self.llm = ChatOpenAI(model=model, temperature=temperature)
+        self.parser = JsonOutputParser(pydantic_object=Answer)
+        self.prompt = PromptTemplate(
+            template=NEW_REFINE_SYSTEM_PROMPT_JSON,
+            input_variables=["question", "answer", "context"],
+            partial_variables={"format_instructions": self.parser.get_format_instructions()},
+        )
+        # Load and process PDF
+        self.pdf_loader = PyMuPDFLoader("refine.pdf")
+        self.pdf_docs = self.pdf_loader.load()
+        # Split the document into chunks
+        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
+        self.splits = self.text_splitter.split_documents(self.pdf_docs)
+        # Create an in-memory vector store from the document splits
+        self.pdf_vectorstore = InMemoryVectorStore.from_documents(
+            documents=self.splits, embedding=OpenAIEmbeddings()
+        )
+        self.pdf_retriever = self.pdf_vectorstore.as_retriever()
+        # Define the processing chain
+        self.chain = (
+                {
+                    "context": itemgetter("question") | self.pdf_retriever,
+                    "question": itemgetter("question"),
+                    "answer": itemgetter("answer")
+                }
+                | RunnableLambda(lambda x: {
+            "context": "\n".join([doc.page_content for doc in x["context"]]),
+            "question": x["question"],
+            "answer": x["answer"]
+        })
+                | self.prompt
+                | self.llm
+                | self.parser
+        )
+    def invoke(self, question: str, answer: str):
+        return self.chain.invoke({"question": question, "answer": answer})

chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e95387811fdd2c605bdbe40ef66c6427e098a831868dd455a3033a5eaf874a3
+size 62140000

chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73a6d751332fa17ddf2061d192fdfd6cdbd8cec7ee44b674a975082dc8b71982
+size 100

chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24b1ebdaa3495251e55f78467b1c4675e3100547c42ba96603c02b2ba59a1121
+size 288012

chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f2e700c214bf60718eade7cbed207636098e2fe9f13fc04b69c3f9957bd9940
+size 20000

chroma_langchain_db/0fadc0a3-4d1d-4f84-8e80-2aaf4d3d5940/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5619be57918b92ff190f6a1a193c472a026ed614ea305f95d5e4d1523961fe03
+size 43052

chroma_langchain_db/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e399b010878c1e88b70e4dff1babfbe01cff91fcbfea99f77817f3b6e66a9771
+size 126881792

config.py CHANGED Viewed

@@ -2,10 +2,7 @@ from langchain.chains.summarize.refine_prompts import REFINE_PROMPT
 XML_SYSTEM_PROMPT= """You're a helpful AI assistant. Given a user question and some scientific literature
 documents which highlight research on different roof cover materials (e.g., asphalt shingles, metal, tile)
-and their performance against natural hazards(e.g., wind, hail), answer the user
-question.
-You’re a helpful AI assistant. Given a user question and scientific literature on various roof cover materials (e.g., asphalt shingles, metal, tile)
-and their performance against natural hazards (e.g., wind, hail), provide clear, concise, and informed answers without unnecessary fluff.
 When addressing questions about ‘what is the best roof,’ consider the following factors:
 	•	Geography, aesthetic preferences, budget, frequency of weather-related hazards, roof cover performance, and how performance changes with age.
@@ -25,6 +22,29 @@ that justify the answer. Use the following format for your final output:
 </cited_answer>
 Here are the articles:{context}"""
 REFINE_SYSTEM_PROMPT = (
     "You are an assistant for question-answering tasks. "
     "Use the following pieces of retrieved context to answer "
@@ -32,4 +52,35 @@ REFINE_SYSTEM_PROMPT = (
     "If you cannot answer the question with the retrieved context, only say that 'Nothing' "
     "\n\n"
     "{context}"
-)

 XML_SYSTEM_PROMPT= """You're a helpful AI assistant. Given a user question and some scientific literature
 documents which highlight research on different roof cover materials (e.g., asphalt shingles, metal, tile)
+and their performance against natural hazards(e.g., wind, hail), provide clear, concise, and informed answers without unnecessary fluff.
 When addressing questions about ‘what is the best roof,’ consider the following factors:
 	•	Geography, aesthetic preferences, budget, frequency of weather-related hazards, roof cover performance, and how performance changes with age.
 </cited_answer>
 Here are the articles:{context}"""
+# NEW_REFINE_SYSTEM_PROMPT = (
+#     "You are an assistant designed to ensure that answers are completely aligned with the authoritative content of a provided PDF. "
+#     "Before retrieving any context, first paraphrase the user's question to fully capture its intended meaning and naturally fit it into the context of the PDF. "
+#     "This is crucial because users sometimes provide partially paraphrased or incomplete questions. "
+#     "The questions generally pertain to scientific literature on roof cover materials (e.g., asphalt shingles, metal, tile) and their performance against natural hazards (e.g., wind, hail). "
+#     "Your task is then to take the refined question and an initial answer from another source, and refine that answer using the context retrieved from the PDF. "
+#     "If the retrieved context supports the initial answer, adjust it to fully match the PDF content. If the context contradicts or does not support the answer, modify it accordingly. "
+#     "Provide clear, concise, and informed answers without unnecessary fluff. If there is no supporting evidence, simply respond with ONLY one word 'Nothing'. "
+#     "IMPORTANT: In your final output, return only the refined answer text with no additional labels, headings, or repeated paraphrased questions."
+# )
+# NEW_REFINE_SYSTEM_PROMPT = (
+#     "You are an assistant designed to ensure that answers are completely aligned with the authoritative content of a provided PDF. "
+#     "Before retrieving any context, first paraphrase the user's question to fully capture its intended meaning and naturally fit it into the context of the PDF. "
+#     "This is crucial because users sometimes provide partially paraphrased or incomplete questions. "
+#     "The questions generally pertain to scientific literature on roof cover materials (e.g., asphalt shingles, metal, tile) and their performance against natural hazards (e.g., wind, hail). "
+#     "Your task is then to take the refined (paraphrased) question and an initial answer from another source, and refine that answer using the context retrieved from the PDF. "
+#     "If the retrieved context supports the initial answer, adjust it to fully match the PDF content. If the context contradicts or does not support the answer, modify it accordingly. "
+#     "Provide clear, concise, and informed answers without unnecessary fluff. If there is no supporting evidence, simply respond with 'Nothing'. "
+#     "IMPORTANT: Your final output must be a valid JSON object with exactly two keys: 'paraphrased_question' and 'answer'. "
+#     "Respond ONLY with the JSON object. Do not include any other text or formatting."
+# )
 REFINE_SYSTEM_PROMPT = (
     "You are an assistant for question-answering tasks. "
     "Use the following pieces of retrieved context to answer "
     "If you cannot answer the question with the retrieved context, only say that 'Nothing' "
     "\n\n"
     "{context}"
+)
+# NEW_REFINE_SYSTEM_PROMPT_JSON = (
+#     "You are an assistant designed to ensure that answers are completely aligned with the authoritative content of a provided PDF. "
+#     "Before retrieving any context, first paraphrase the user's question to fully capture its intended meaning and naturally fit it into the context of the PDF. "
+#     "This is crucial because users sometimes provide partially paraphrased or incomplete questions. "
+#     "The questions generally pertain to scientific literature on roof cover materials (e.g., asphalt shingles, metal, tile) and their performance against natural hazards (e.g., wind, hail). "
+#     "Your task is then to take the refined (paraphrased) question and an initial answer from another source, and refine that answer using the context retrieved from the PDF. "
+#     "If the retrieved context supports the initial answer, adjust it to fully match the PDF content. If the context contradicts or does not support the answer, modify it accordingly. "
+#     "Provide clear, concise, and informed answers without unnecessary fluff. If there is no supporting evidence, simply respond with 'Nothing'. "
+#     "Here is the user question: {question} "
+#     "Here is the initial answer: {answer} "
+#     "Here is the retrieved context: {context} "
+#     "IMPORTANT: Your final output must be a valid JSON object with {format_instructions} "
+# )
+NEW_REFINE_SYSTEM_PROMPT_JSON = """You are an assistant that ensures answers are fully aligned with the authoritative content of a provided PDF document. Before retrieving any context, first paraphrase the user's question to capture its complete intended meaning and seamlessly integrate it into the context of the PDF. This is essential because users sometimes submit partially paraphrased or incomplete questions, especially regarding scientific literature on roof cover materials (e.g., asphalt shingles, metal, tile) and their performance against natural hazards (e.g., wind, hail).
+Your task is as follows:
+1. Paraphrase the provided user question to fully clarify its intent.
+2. Using the retrieved context from the PDF, refine the initial answer from another source:
+   - If the context supports the initial answer, adjust it so that it fully aligns with the PDF content.
+   - If the context contradicts or does not support the answer, modify the answer accordingly.
+   - If there is no supporting evidence, respond ONLY with "Nothing" as "enhanced_answer".
+3. Provide a clear, concise, and informed answer without unnecessary fluff.
+Inputs:
+- User question: {question}
+- Initial answer: {answer}
+- Retrieved context: {context}
+IMPORTANT: Your final output must be a valid JSON object with exactly two keys: "paraphrased_question" and "answer". {format_instructions}"""

helpers.py CHANGED Viewed

@@ -32,7 +32,8 @@ def get_article_info(df: pd.DataFrame, file_name: str):
     Given a DataFrame and a file name, return the corresponding
     title and link from the CSV. Assumes file_name is unique in the DF.
     """
-    row = df[df["file_name"] == file_name]
     if row.empty:
         # Fallback if not found
         return "IBHS Website", "https://ibhs.org"

     Given a DataFrame and a file name, return the corresponding
     title and link from the CSV. Assumes file_name is unique in the DF.
     """
+    edited_file_name = file_name.replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
+    row = df[df["file_name"] == edited_file_name]
     if row.empty:
         # Fallback if not found
         return "IBHS Website", "https://ibhs.org"

main.py CHANGED Viewed

@@ -1,11 +1,3 @@
-from langchain_openai import OpenAIEmbeddings
-from langchain_openai import ChatOpenAI
-from langchain_chroma import Chroma
-import chromadb
-from chromadb.config import Settings
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.output_parsers import XMLOutputParser
 import gradio as gr
 import pandas as pd
 import logging
@@ -13,24 +5,17 @@ from langchain_core.exceptions import OutputParserException
 import os
 from dotenv import load_dotenv
 import azure.cosmos.cosmos_client as cosmos_client
-from langchain_community.document_loaders import PyPDFLoader
-from langchain_core.vectorstores import InMemoryVectorStore
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain.chains import create_retrieval_chain
-from langchain.chains.combine_documents import create_stuff_documents_chain
-from langchain_core.runnables import RunnableLambda
 import datetime
 import uuid
-from config import XML_SYSTEM_PROMPT, REFINE_SYSTEM_PROMPT
-from helpers import *
 load_dotenv()
-# Constants
-PERSIST_DIRECTORY = "chroma_store"
-K_VALUE = 5
-xml_prompt = ChatPromptTemplate.from_messages([("system", XML_SYSTEM_PROMPT), ("human", "{input}")])
 ENV = os.getenv('ENV')
 HOST = os.getenv('ACCOUNT_HOST')
@@ -42,58 +27,8 @@ client = cosmos_client.CosmosClient(HOST, {'masterKey': MASTER_KEY}, user_agent=
 database = client.get_database_client(DATABASE_ID)
 container = database.get_container_client(CONTAINER_ID)
 history_container = database.get_container_client(HISTORY_CONTAINER_ID)
-llm = ChatOpenAI(model="gpt-4o", temperature=0)
-df = pd.read_csv("articles_db.csv")
-rag_chain_from_docs = (
-        RunnablePassthrough.assign(context=(lambda x: format_docs_xml(x["context"])))
-        | xml_prompt
-        | llm
-        | XMLOutputParser()
-)
-settings = Settings(persist_directory=PERSIST_DIRECTORY)
-vectordb = Chroma(embedding_function=OpenAIEmbeddings(), persist_directory=PERSIST_DIRECTORY)
-retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": K_VALUE})
-retrieve_docs = (lambda x: x["input"]) | retriever
-chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
-    answer=rag_chain_from_docs
-)
-pdf_loader = PyPDFLoader("refine.pdf")
-pdf_docs = pdf_loader.load()
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
-splits = text_splitter.split_documents(pdf_docs)
-pdf_vectorstore = InMemoryVectorStore.from_documents(
-    documents=splits, embedding=OpenAIEmbeddings()
-)
-pdf_retriever = pdf_vectorstore.as_retriever()
-refine_prompt = ChatPromptTemplate.from_messages(
-    [
-        ("system", REFINE_SYSTEM_PROMPT),
-        ("human", "{input}"),
-    ]
-)
-question_answer_chain = create_stuff_documents_chain(llm, refine_prompt)
-pdf_rag_chain = create_retrieval_chain(pdf_retriever, question_answer_chain)
-def vectordb_search(query):
-    titles, links = [], []
-    question_search = retriever.invoke(query)
-    for item in question_search:
-        edited_item = item.metadata["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
-        title, link = get_article_info(df, edited_item)
-        if title not in titles:
-            titles.append(title)
-            links.append(link)
-    return "\n".join([f"- [{title}]({link})" for title, link in zip(titles, links)])
 def initialize_session(session_id):
     # If no session_id exists, generate a new one
@@ -111,47 +46,68 @@ def llm_response(query, session_id):
     chat["partitionKey"] = "RoofingRoadmap"
     chat["user"] = query
     chat["env"] = ENV
-    pdf_answer = pdf_rag_chain.invoke({"input": f"{query}"})
     try:
-        if 'f wave' in query.lower() or 'f-wave' in query.lower() or 'fwave' in query.lower():
-            query = query.replace('f wave', 'f-wave shingle').replace('f-wave', 'f-wave shingle').replace('fwave', 'f-wave shingle')
-        result = chain.invoke({"input": query})
-        if pdf_answer['answer'] != 'Nothing':
-            answer = pdf_answer['answer']
-        else:
-            answer = result['answer']['cited_answer'][0].get("answer", "No answer available.")
-        citations = result['answer']['cited_answer'][1].get('citations', [])
-        for citation in citations:
-            try:
-                edited_item = citation['citation'][1]["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
-                title, link = get_article_info(df, edited_item)
-                if title not in titles:
-                    titles.append(title)
-                # if link not in links:
-                    links.append(link)
-            except (TypeError, KeyError, IndexError):
-                # Handle the error or simply pass if citation does not have the expected keys
-                continue
-        question_search = retriever.invoke(query)
-        for res_item in question_search:
-            edited_item = res_item.metadata["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
-            res_title, res_link = get_article_info(df, edited_item)
-            if res_title not in res_titles and res_title not in titles:
-                res_titles.append(res_title)
-                res_links.append(res_link)
-    except OutputParserException as e:
-        if pdf_answer['answer'] == 'Nothing':
             answer = "Your search is beyond the scope of this tool at this time. Please explore the rest of [IBHS website](https://ibhs.org) to find research on this topic."
             return answer
         else:
-            answer = pdf_answer['answer']
     finally:
         chat["ai"] = answer
         chat["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         container.create_item(body=chat)
@@ -172,8 +128,9 @@ def llm_response(query, session_id):
     # Combine answer and citations for final markdown output
-    if not res_links:
-        return markdown_list
     else:
         markdown_list += f"\n\n\nHere is a list of articles that can provide more information about your inquiry:\n"
         markdown_list += "\n".join([f"- [{res_title}]({res_link})" for res_title, res_link in zip(res_titles, res_links)])

 import gradio as gr
 import pandas as pd
 import logging
 import os
 from dotenv import load_dotenv
 import azure.cosmos.cosmos_client as cosmos_client
 from langchain.chains import create_retrieval_chain
 import datetime
 import uuid
+from LiteratureAgent import RoofCoverChatbot
+from Refiner import RefinementPipeline
+from helpers import get_article_info
 load_dotenv()
+refiner = RefinementPipeline()
+literature_agent = RoofCoverChatbot()
 ENV = os.getenv('ENV')
 HOST = os.getenv('ACCOUNT_HOST')
 database = client.get_database_client(DATABASE_ID)
 container = database.get_container_client(CONTAINER_ID)
 history_container = database.get_container_client(HISTORY_CONTAINER_ID)
+df = pd.read_csv("articles_db.csv")
 def initialize_session(session_id):
     # If no session_id exists, generate a new one
     chat["partitionKey"] = "RoofingRoadmap"
     chat["user"] = query
     chat["env"] = ENV
+    answer = None
+    if 'f wave' in query.lower() or 'f-wave' in query.lower() or 'fwave' in query.lower():
+        query = query.replace('f wave', 'f-wave shingle').replace('f-wave', 'f-wave shingle').replace('fwave',
+                                                                                                      'f-wave shingle')
     try:
+        response = literature_agent.get_response(query)
+        enhanced_query = refiner.invoke(question=query, answer=response)
+        try:
+            initial_answer = response['answer']['cited_answer'][0].get("answer", "Nothing")
+        except Exception as e:
+            initial_answer = "Nothing"
+        if enhanced_query.get("enhanced_answer") == "Nothing" and initial_answer == "Nothing":
             answer = "Your search is beyond the scope of this tool at this time. Please explore the rest of [IBHS website](https://ibhs.org) to find research on this topic."
             return answer
+        if enhanced_query.get("enhanced_answer") != "Nothing":
+            answer = enhanced_query['enhanced_answer']
         else:
+            answer = response
+        citations = response['answer']['cited_answer'][1].get('citations', [])
+        original_citations = []
+        if citations:
+            for citation in citations:
+                try:
+                    # edited_item = citation['citation'][1]["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
+                    original_citations.append(citation['citation'][1]["source"])
+                    title, link = get_article_info(df, citation['citation'][1]["source"])
+                    if title not in titles:
+                        titles.append(title)
+                    # if link not in links:
+                        links.append(link)
+                except Exception as e:
+                    continue
+        try:
+            question_search = literature_agent.get_extra_resources(query, original_citations)
+        except Exception as e:
+            question_search = []
+        if question_search:
+            for res_item in question_search:
+                res_title, res_link = get_article_info(df, res_item.metadata["source"])
+                if res_title not in res_titles and res_title not in titles:
+                    res_titles.append(res_title)
+                    res_links.append(res_link)
+                if len(res_titles) == 5:
+                    break
+    except Exception as e:
+        answer = "Your search is beyond the scope of this tool at this time. Please explore the rest of [IBHS website](https://ibhs.org) to find research on this topic."
+        return answer
     finally:
+        if answer is None:
+            answer = "Your search is beyond the scope of this tool at this time. Please explore the rest of [IBHS website](https://ibhs.org) to find research on this topic."
         chat["ai"] = answer
         chat["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         container.create_item(body=chat)
     # Combine answer and citations for final markdown output
+    if not res_links and not links:
+        markdown_list += f"\n\n\nHere is a list of articles that can provide more information about your inquiry:\n"
+        markdown_list += "\n".join(["- [IBHS Website](https://ibhs.org)", "- [FORTIFIED Website](https://fortifiedhome.org/roof/)" ])
     else:
         markdown_list += f"\n\n\nHere is a list of articles that can provide more information about your inquiry:\n"
         markdown_list += "\n".join([f"- [{res_title}]({res_link})" for res_title, res_link in zip(res_titles, res_links)])

refine.pdf CHANGED Viewed

Binary files a/refine.pdf and b/refine.pdf differ

requirements.txt CHANGED Viewed

@@ -22,4 +22,5 @@ docx2txt
 azure-mgmt-storage
 azure-identity
 azure-storage-blob
-azure-cosmos

 azure-mgmt-storage
 azure-identity
 azure-storage-blob
+azure-cosmos
+fitz