IBHS commited on
Commit
f4fa293
·
verified ·
1 Parent(s): cb07d17

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +319 -319
main.py CHANGED
@@ -1,320 +1,320 @@
1
- from langchain_openai import OpenAIEmbeddings
2
- from langchain_openai import ChatOpenAI
3
- from langchain_chroma import Chroma
4
- import chromadb
5
- from chromadb.config import Settings
6
- from langchain_core.prompts import ChatPromptTemplate
7
- from typing import List
8
-
9
- from langchain_core.documents import Document
10
- from langchain_core.runnables import RunnablePassthrough
11
- from langchain_core.output_parsers import XMLOutputParser
12
- import gradio as gr
13
- import pandas as pd
14
- import logging
15
- from langchain_core.exceptions import OutputParserException
16
- import os
17
- from dotenv import load_dotenv
18
-
19
- from sympy.codegen.ast import continue_
20
-
21
- import azure.cosmos.cosmos_client as cosmos_client
22
- import azure.cosmos.exceptions as exceptions
23
- from azure.cosmos.partition_key import PartitionKey
24
-
25
- from langchain_community.document_loaders import PyPDFLoader
26
- from langchain_core.vectorstores import InMemoryVectorStore
27
- from langchain_text_splitters import RecursiveCharacterTextSplitter
28
- from langchain.chains import create_retrieval_chain
29
- from langchain.chains.combine_documents import create_stuff_documents_chain
30
- from langchain_core.runnables import RunnableLambda
31
- import datetime
32
- import uuid
33
- import hashlib
34
- load_dotenv()
35
- # Constants
36
- PERSIST_DIRECTORY = "chroma_store"
37
- K_VALUE = 5
38
-
39
- xml_system = """You're a helpful AI assistant. Given a user question and some scientific literature
40
- documents which highlight research on different roof cover materials (e.g., asphalt shingles, metal, tile)
41
- and their performance against natural hazards(e.g., wind, hail), answer the user
42
- question.
43
- You’re a helpful AI assistant. Given a user question and scientific literature on various roof cover materials (e.g., asphalt shingles, metal, tile)
44
- and their performance against natural hazards (e.g., wind, hail), provide clear, concise, and informed answers without unnecessary fluff.
45
-
46
- When addressing questions about ‘what is the best roof,’ consider the following factors:
47
- • Geography, aesthetic preferences, budget, frequency of weather-related hazards, roof cover performance, and how performance changes with age.
48
- • For the insurance industry, the ‘best roof’ depends on the specific hazards (their location and frequency), performance expectations and predictability, and the cost of materials.
49
-
50
- If none of the articles answer the question, simply say that there are no articles relevant to your inquiry.
51
- Remember, you must return both an answer and citations. A citation consists of a VERBATIM quote that
52
- justifies the answer and the ID and also Source Name of the quote article. Return a citation for every quote across all articles
53
- that justify the answer. Use the following format for your final output:
54
- <cited_answer>
55
- <answer></answer>
56
- <citations>
57
- <citation><source_id></source_id><source></source><quote></quote></citation>
58
- <citation><source_id></source_id><source></source><quote></quote></citation>
59
- ...
60
- </citations>
61
- </cited_answer>
62
- Here are the articles:{context}"""
63
-
64
- xml_prompt = ChatPromptTemplate.from_messages([("system", xml_system), ("human", "{input}")])
65
-
66
- ENV = os.getenv('ENV')
67
- HOST = os.getenv('ACCOUNT_HOST')
68
- MASTER_KEY = os.getenv('ACCOUNT_KEY')
69
- DATABASE_ID = os.getenv('COSMOS_DATABASE')
70
- CONTAINER_ID = os.getenv('COSMOS_CONTAINER')
71
- HISTORY_CONTAINER_ID = os.getenv('COSMOS_HISTORY_CONTAINER')
72
- client = cosmos_client.CosmosClient(HOST, {'masterKey': MASTER_KEY}, user_agent="CosmosDBPythonQuickstart", user_agent_overwrite=True)
73
- database = client.get_database_client(DATABASE_ID)
74
- container = database.get_container_client(CONTAINER_ID)
75
- history_container = database.get_container_client(HISTORY_CONTAINER_ID)
76
-
77
-
78
- def format_docs_xml(docs: List[Document]) -> str:
79
- formatted_docs = [
80
- f"<source id=\"{i}\">\n<source>{doc.metadata['source']}</source>\n<article_snippet>{doc.page_content}</article_snippet>\n</source>"
81
- for i, doc in enumerate(docs)
82
- ]
83
- return f"\n\n<sources>\n{chr(10).join(formatted_docs)}\n</sources>"
84
-
85
- def parse_output_with_error_handling(output):
86
- try:
87
- return XMLOutputParser()
88
- except Exception:
89
- # return {'answer':{'cited_answer': [{'answer': ''},{'citations': []}]}}
90
- return XMLOutputParser().parse("")
91
-
92
-
93
- llm = ChatOpenAI(model="gpt-4o", temperature=0)
94
-
95
-
96
- rag_chain_from_docs = (
97
- RunnablePassthrough.assign(context=(lambda x: format_docs_xml(x["context"])))
98
- | xml_prompt
99
- | llm
100
- | XMLOutputParser()
101
- )
102
-
103
- settings = Settings(persist_directory=PERSIST_DIRECTORY)
104
- vectordb = Chroma(embedding_function=OpenAIEmbeddings(), persist_directory=PERSIST_DIRECTORY)
105
- retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": K_VALUE})
106
- retrieve_docs = (lambda x: x["input"]) | retriever
107
- chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
108
- answer=rag_chain_from_docs
109
- )
110
-
111
-
112
- pdf_loader = PyPDFLoader("refine.pdf")
113
- pdf_docs = pdf_loader.load()
114
-
115
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
116
- splits = text_splitter.split_documents(pdf_docs)
117
- pdf_vectorstore = InMemoryVectorStore.from_documents(
118
- documents=splits, embedding=OpenAIEmbeddings()
119
- )
120
-
121
- pdf_retriever = pdf_vectorstore.as_retriever()
122
-
123
- refine_system_prompt = (
124
- "You are an assistant for question-answering tasks. "
125
- "Use the following pieces of retrieved context to answer "
126
- "the question. provide clear, concise, and informed answers without unnecessary fluff. "
127
- "If you cannot answer the question with the retrieved context, only say that 'Nothing' "
128
- "\n\n"
129
- "{context}"
130
- )
131
-
132
- refine_prompt = ChatPromptTemplate.from_messages(
133
- [
134
- ("system", refine_system_prompt),
135
- ("human", "{input}"),
136
- ]
137
- )
138
-
139
- question_answer_chain = create_stuff_documents_chain(llm, refine_prompt)
140
- pdf_rag_chain = create_retrieval_chain(pdf_retriever, question_answer_chain)
141
-
142
- def get_article_info(df, file_name):
143
- title = df[df["file_name"] == file_name]["title"].iloc[0]
144
- link = df[df["file_name"] == file_name]["link"].iloc[0]
145
- return title, link
146
-
147
-
148
- df = pd.read_csv("articles_db.csv")
149
-
150
-
151
- def vectordb_search(query):
152
- titles, links = [], []
153
- question_search = retriever.invoke(query)
154
- for item in question_search:
155
- edited_item = item.metadata["source"].replace("Articles/", "")
156
- title, link = get_article_info(df, edited_item)
157
- if title not in titles:
158
- titles.append(title)
159
- # if link not in links:
160
- links.append(link)
161
- return "\n".join([f"- [{title}]({link})" for title, link in zip(titles, links)])
162
-
163
- RESPONSE_ID = ""
164
-
165
-
166
- def llm_response(query):
167
- global RESPONSE_ID
168
- RESPONSE_ID = str(uuid.uuid4())
169
- chat = {}
170
- titles, links, res_titles, res_links = [], [], [], []
171
- chat["id"] = RESPONSE_ID
172
- chat["partitionKey"] = "RoofingRoadmap"
173
- chat["user"] = query
174
- chat["env"] = ENV
175
-
176
- pdf_answer = pdf_rag_chain.invoke({"input": f"{query}"})
177
- try:
178
- if 'f wave' in query.lower() or 'f-wave' in query.lower() or 'fwave' in query.lower():
179
- query = query.replace('f wave', 'f-wave shingle').replace('f-wave', 'f-wave shingle').replace('fwave', 'f-wave shingle')
180
- result = chain.invoke({"input": query})
181
- if pdf_answer['answer'] == 'Nothing':
182
- answer = result['answer']['cited_answer'][0]["answer"]
183
- citations = result['answer']['cited_answer'][1]['citations']
184
- for citation in citations:
185
- try:
186
- edited_item = citation['citation'][1]["source"].replace("Articles/", "")
187
- title, link = get_article_info(df, edited_item)
188
- if title not in titles:
189
- titles.append(title)
190
- # if link not in links:
191
- links.append(link)
192
- except (TypeError, KeyError, IndexError):
193
- # Handle the error or simply pass if citation does not have the expected keys
194
- continue
195
- else:
196
- answer = pdf_answer['answer']
197
-
198
- if not result['answer']['cited_answer'][1]['citations']:
199
- answer_with_citations = f"{answer}"
200
- else:
201
- question_search = retriever.invoke(query)
202
- for res_item in question_search:
203
- edited_item = res_item.metadata["source"].replace("Articles/", "")
204
- res_title, res_link = get_article_info(df, edited_item)
205
- if res_title not in res_titles and res_title not in titles:
206
- res_titles.append(res_title)
207
- # if res_link not in res_links and res_link not in links:
208
- res_links.append(res_link)
209
-
210
-
211
- except OutputParserException as e:
212
- if pdf_answer['answer'] == 'Nothing':
213
- answer = "Your search is beyond the scope of this tool at this time. Please explore the rest of [IBHS website](https://ibhs.org) to find research on this topic."
214
- return answer
215
- else:
216
- answer = pdf_answer['answer']
217
-
218
- finally:
219
- chat["ai"] = answer
220
- chat["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
221
- container.create_item(body=chat)
222
-
223
- # Build the answer with superscript citations
224
- answer_with_citations = f"{answer}"
225
- for i, (title, link) in enumerate(zip(titles, links), start=1):
226
- answer_with_citations += f" <sup>[[{i}]({link})]</sup> "
227
-
228
- # Build the references section with clickable links
229
- if not links:
230
- markdown_list = f"{answer_with_citations}"
231
- else:
232
- citations_section = "\n\nCitations:\n" + "\n".join(
233
- [f"[{i}]: [{title}]({link})" for i, (title, link) in enumerate(zip(titles, links), start=1)]
234
- )
235
- markdown_list = f"{answer_with_citations}{citations_section}"
236
- # Combine answer and citations for final markdown output
237
-
238
-
239
- if not res_links:
240
- return markdown_list
241
- else:
242
- markdown_list += f"\n\n\nHere is a list of articles that can provide more information about your inquiry:\n"
243
- markdown_list += "\n".join([f"- [{res_title}]({res_link})" for res_title, res_link in zip(res_titles, res_links)])
244
-
245
- return markdown_list
246
-
247
- def vote(value, data: gr.LikeData):
248
- chat_vote = {}
249
- chat_vote["id"] = str(uuid.uuid4())
250
- chat_vote["chat_id"] = RESPONSE_ID
251
- chat_vote["partitionKey"] = "RoofingRoadmapVotes"
252
- chat_vote["response"] = data.value[0].split('<sup>', 1)[0].split('\n', 1)[0]
253
- chat_vote["env"] = ENV
254
- chat_vote["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
255
-
256
- if data.liked:
257
- chat_vote["vote"] = "upvote"
258
- else:
259
- chat_vote["vote"] = "downvote"
260
-
261
- history_container.create_item(body=chat_vote)
262
-
263
- def show_feedback_column(visible):
264
- if visible:
265
- # If visible, hide the column
266
- return gr.update(visible=False), gr.update(value=""), False
267
- else:
268
- # If not visible, show the column and clear the Textbox
269
- return gr.update(visible=True), "", True
270
-
271
- def user_feedback(value, ):
272
- chat_feedback = {}
273
- chat_feedback["id"] = str(uuid.uuid4())
274
- chat_feedback["chat_id"] = RESPONSE_ID
275
- chat_feedback["partitionKey"] = "RoofingRoadmapFeedback"
276
- chat_feedback["feedback"] = value
277
- chat_feedback["env"] = ENV
278
- chat_feedback["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
279
- history_container.create_item(body=chat_feedback)
280
- return gr.update(visible=False), ""
281
-
282
- with gr.Blocks() as demo:
283
- gr.Markdown("## Find literature to answer your question!")
284
- gr.Markdown("### Ask a question about the wind and hail performance of asphalt shingle, metal, and tile roofs.")
285
- with gr.Row():
286
- with gr.Column():
287
- chatbot = gr.Chatbot(type="messages", height=400)
288
- chatbot.like(vote, chatbot, None)
289
- msg = gr.Textbox(label="Hit the Enter to send your question", placeholder="What's on your mind?", show_copy_button=True)
290
- with gr.Row():
291
- send = gr.Button("Send", variant="primary", scale=3)
292
- feedback = gr.Button("Feedback", variant="stop", scale=1)
293
- with gr.Column(visible=False, elem_id="feedback_column") as feedback_column:
294
- usr_msg = gr.Textbox(label="Submit feedback to IBHS", info="What went wrong?", placeholder="Give us as much detail as you can!", lines=3)
295
- usr_submit = gr.Button("Submit", variant="secondary")
296
-
297
-
298
-
299
- def user(user_message, history: list):
300
- return "", history + [{"role": "user", "content": user_message}]
301
-
302
-
303
- def bot(history: list):
304
- bot_message = llm_response(history[-1]['content'])
305
- history.append({"role": "assistant", "content": ""})
306
- for character in bot_message:
307
- history[-1]['content'] += character
308
- yield history
309
-
310
-
311
- feedback_column_state = gr.State(False)
312
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
313
- send.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
314
- feedback.click(fn=show_feedback_column, inputs=[feedback_column_state], outputs=[feedback_column, usr_msg, feedback_column_state])
315
- usr_submit.click(user_feedback, usr_msg, outputs=[feedback_column, usr_msg])
316
- gr.Markdown("*Our chatbot is constantly learning and improving to better serve you!*")
317
- gr.Markdown("#### Additional questions? Contact IBHS Membership Manager Larry Scott at [lscott@ibhs.org]().")
318
-
319
- if __name__ == "__main__":
320
  demo.launch()
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_chroma import Chroma
4
+ import chromadb
5
+ from chromadb.config import Settings
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from typing import List
8
+
9
+ from langchain_core.documents import Document
10
+ from langchain_core.runnables import RunnablePassthrough
11
+ from langchain_core.output_parsers import XMLOutputParser
12
+ import gradio as gr
13
+ import pandas as pd
14
+ import logging
15
+ from langchain_core.exceptions import OutputParserException
16
+ import os
17
+ from dotenv import load_dotenv
18
+
19
+ from sympy.codegen.ast import continue_
20
+
21
+ import azure.cosmos.cosmos_client as cosmos_client
22
+ import azure.cosmos.exceptions as exceptions
23
+ from azure.cosmos.partition_key import PartitionKey
24
+
25
+ from langchain_community.document_loaders import PyPDFLoader
26
+ from langchain_core.vectorstores import InMemoryVectorStore
27
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
28
+ from langchain.chains import create_retrieval_chain
29
+ from langchain.chains.combine_documents import create_stuff_documents_chain
30
+ from langchain_core.runnables import RunnableLambda
31
+ import datetime
32
+ import uuid
33
+ import hashlib
34
+ load_dotenv()
35
+ # Constants
36
+ PERSIST_DIRECTORY = "chroma_store"
37
+ K_VALUE = 5
38
+
39
+ xml_system = """You're a helpful AI assistant. Given a user question and some scientific literature
40
+ documents which highlight research on different roof cover materials (e.g., asphalt shingles, metal, tile)
41
+ and their performance against natural hazards(e.g., wind, hail), answer the user
42
+ question.
43
+ You’re a helpful AI assistant. Given a user question and scientific literature on various roof cover materials (e.g., asphalt shingles, metal, tile)
44
+ and their performance against natural hazards (e.g., wind, hail), provide clear, concise, and informed answers without unnecessary fluff.
45
+
46
+ When addressing questions about ‘what is the best roof,’ consider the following factors:
47
+ • Geography, aesthetic preferences, budget, frequency of weather-related hazards, roof cover performance, and how performance changes with age.
48
+ • For the insurance industry, the ‘best roof’ depends on the specific hazards (their location and frequency), performance expectations and predictability, and the cost of materials.
49
+
50
+ If none of the articles answer the question, simply say that there are no articles relevant to your inquiry.
51
+ Remember, you must return both an answer and citations. A citation consists of a VERBATIM quote that
52
+ justifies the answer and the ID and also Source Name of the quote article. Return a citation for every quote across all articles
53
+ that justify the answer. Use the following format for your final output:
54
+ <cited_answer>
55
+ <answer></answer>
56
+ <citations>
57
+ <citation><source_id></source_id><source></source><quote></quote></citation>
58
+ <citation><source_id></source_id><source></source><quote></quote></citation>
59
+ ...
60
+ </citations>
61
+ </cited_answer>
62
+ Here are the articles:{context}"""
63
+
64
+ xml_prompt = ChatPromptTemplate.from_messages([("system", xml_system), ("human", "{input}")])
65
+
66
+ ENV = os.getenv('ENV')
67
+ HOST = os.getenv('ACCOUNT_HOST')
68
+ MASTER_KEY = os.getenv('ACCOUNT_KEY')
69
+ DATABASE_ID = os.getenv('COSMOS_DATABASE')
70
+ CONTAINER_ID = os.getenv('COSMOS_CONTAINER')
71
+ HISTORY_CONTAINER_ID = os.getenv('COSMOS_HISTORY_CONTAINER')
72
+ client = cosmos_client.CosmosClient(HOST, {'masterKey': MASTER_KEY}, user_agent="CosmosDBPythonQuickstart", user_agent_overwrite=True)
73
+ database = client.get_database_client(DATABASE_ID)
74
+ container = database.get_container_client(CONTAINER_ID)
75
+ history_container = database.get_container_client(HISTORY_CONTAINER_ID)
76
+
77
+
78
+ def format_docs_xml(docs: List[Document]) -> str:
79
+ formatted_docs = [
80
+ f"<source id=\"{i}\">\n<source>{doc.metadata['source']}</source>\n<article_snippet>{doc.page_content}</article_snippet>\n</source>"
81
+ for i, doc in enumerate(docs)
82
+ ]
83
+ return f"\n\n<sources>\n{chr(10).join(formatted_docs)}\n</sources>"
84
+
85
+ def parse_output_with_error_handling(output):
86
+ try:
87
+ return XMLOutputParser()
88
+ except Exception:
89
+ # return {'answer':{'cited_answer': [{'answer': ''},{'citations': []}]}}
90
+ return XMLOutputParser().parse("")
91
+
92
+
93
+ llm = ChatOpenAI(model="gpt-4o", temperature=0, api_key=os.getenv("OPEN_API_KEY"))
94
+
95
+
96
+ rag_chain_from_docs = (
97
+ RunnablePassthrough.assign(context=(lambda x: format_docs_xml(x["context"])))
98
+ | xml_prompt
99
+ | llm
100
+ | XMLOutputParser()
101
+ )
102
+
103
+ settings = Settings(persist_directory=PERSIST_DIRECTORY)
104
+ vectordb = Chroma(embedding_function=OpenAIEmbeddings(api_key=os.getenv("OPEN_API_KEY")), persist_directory=PERSIST_DIRECTORY)
105
+ retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": K_VALUE})
106
+ retrieve_docs = (lambda x: x["input"]) | retriever
107
+ chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
108
+ answer=rag_chain_from_docs
109
+ )
110
+
111
+
112
+ pdf_loader = PyPDFLoader("refine.pdf")
113
+ pdf_docs = pdf_loader.load()
114
+
115
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
116
+ splits = text_splitter.split_documents(pdf_docs)
117
+ pdf_vectorstore = InMemoryVectorStore.from_documents(
118
+ documents=splits, embedding=OpenAIEmbeddings(api_key=os.getenv("OPEN_API_KEY"))
119
+ )
120
+
121
+ pdf_retriever = pdf_vectorstore.as_retriever()
122
+
123
+ refine_system_prompt = (
124
+ "You are an assistant for question-answering tasks. "
125
+ "Use the following pieces of retrieved context to answer "
126
+ "the question. provide clear, concise, and informed answers without unnecessary fluff. "
127
+ "If you cannot answer the question with the retrieved context, only say that 'Nothing' "
128
+ "\n\n"
129
+ "{context}"
130
+ )
131
+
132
+ refine_prompt = ChatPromptTemplate.from_messages(
133
+ [
134
+ ("system", refine_system_prompt),
135
+ ("human", "{input}"),
136
+ ]
137
+ )
138
+
139
+ question_answer_chain = create_stuff_documents_chain(llm, refine_prompt)
140
+ pdf_rag_chain = create_retrieval_chain(pdf_retriever, question_answer_chain)
141
+
142
+ def get_article_info(df, file_name):
143
+ title = df[df["file_name"] == file_name]["title"].iloc[0]
144
+ link = df[df["file_name"] == file_name]["link"].iloc[0]
145
+ return title, link
146
+
147
+
148
+ df = pd.read_csv("articles_db.csv")
149
+
150
+
151
+ def vectordb_search(query):
152
+ titles, links = [], []
153
+ question_search = retriever.invoke(query)
154
+ for item in question_search:
155
+ edited_item = item.metadata["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
156
+ title, link = get_article_info(df, edited_item)
157
+ if title not in titles:
158
+ titles.append(title)
159
+ # if link not in links:
160
+ links.append(link)
161
+ return "\n".join([f"- [{title}]({link})" for title, link in zip(titles, links)])
162
+
163
+ RESPONSE_ID = ""
164
+
165
+
166
+ def llm_response(query):
167
+ global RESPONSE_ID
168
+ RESPONSE_ID = str(uuid.uuid4())
169
+ chat = {}
170
+ titles, links, res_titles, res_links = [], [], [], []
171
+ chat["id"] = RESPONSE_ID
172
+ chat["partitionKey"] = "RoofingRoadmap"
173
+ chat["user"] = query
174
+ chat["env"] = ENV
175
+
176
+ pdf_answer = pdf_rag_chain.invoke({"input": f"{query}"})
177
+ try:
178
+ if 'f wave' in query.lower() or 'f-wave' in query.lower() or 'fwave' in query.lower():
179
+ query = query.replace('f wave', 'f-wave shingle').replace('f-wave', 'f-wave shingle').replace('fwave', 'f-wave shingle')
180
+ result = chain.invoke({"input": query})
181
+ if pdf_answer['answer'] == 'Nothing':
182
+ answer = result['answer']['cited_answer'][0]["answer"]
183
+ citations = result['answer']['cited_answer'][1]['citations']
184
+ for citation in citations:
185
+ try:
186
+ edited_item = citation['citation'][1]["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
187
+ title, link = get_article_info(df, edited_item)
188
+ if title not in titles:
189
+ titles.append(title)
190
+ # if link not in links:
191
+ links.append(link)
192
+ except (TypeError, KeyError, IndexError):
193
+ # Handle the error or simply pass if citation does not have the expected keys
194
+ continue
195
+ else:
196
+ answer = pdf_answer['answer']
197
+
198
+ if not result['answer']['cited_answer'][1]['citations']:
199
+ answer_with_citations = f"{answer}"
200
+ else:
201
+ question_search = retriever.invoke(query)
202
+ for res_item in question_search:
203
+ edited_item = res_item.metadata["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
204
+ res_title, res_link = get_article_info(df, edited_item)
205
+ if res_title not in res_titles and res_title not in titles:
206
+ res_titles.append(res_title)
207
+ # if res_link not in res_links and res_link not in links:
208
+ res_links.append(res_link)
209
+
210
+
211
+ except OutputParserException as e:
212
+ if pdf_answer['answer'] == 'Nothing':
213
+ answer = "Your search is beyond the scope of this tool at this time. Please explore the rest of [IBHS website](https://ibhs.org) to find research on this topic."
214
+ return answer
215
+ else:
216
+ answer = pdf_answer['answer']
217
+
218
+ finally:
219
+ chat["ai"] = answer
220
+ chat["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
221
+ container.create_item(body=chat)
222
+
223
+ # Build the answer with superscript citations
224
+ answer_with_citations = f"{answer}"
225
+ for i, (title, link) in enumerate(zip(titles, links), start=1):
226
+ answer_with_citations += f" <sup>[[{i}]({link})]</sup> "
227
+
228
+ # Build the references section with clickable links
229
+ if not links:
230
+ markdown_list = f"{answer_with_citations}"
231
+ else:
232
+ citations_section = "\n\nCitations:\n" + "\n".join(
233
+ [f"[{i}]: [{title}]({link})" for i, (title, link) in enumerate(zip(titles, links), start=1)]
234
+ )
235
+ markdown_list = f"{answer_with_citations}{citations_section}"
236
+ # Combine answer and citations for final markdown output
237
+
238
+
239
+ if not res_links:
240
+ return markdown_list
241
+ else:
242
+ markdown_list += f"\n\n\nHere is a list of articles that can provide more information about your inquiry:\n"
243
+ markdown_list += "\n".join([f"- [{res_title}]({res_link})" for res_title, res_link in zip(res_titles, res_links)])
244
+
245
+ return markdown_list
246
+
247
+ def vote(value, data: gr.LikeData):
248
+ chat_vote = {}
249
+ chat_vote["id"] = str(uuid.uuid4())
250
+ chat_vote["chat_id"] = RESPONSE_ID
251
+ chat_vote["partitionKey"] = "RoofingRoadmapVotes"
252
+ chat_vote["response"] = data.value[0].split('<sup>', 1)[0].split('\n', 1)[0]
253
+ chat_vote["env"] = ENV
254
+ chat_vote["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
255
+
256
+ if data.liked:
257
+ chat_vote["vote"] = "upvote"
258
+ else:
259
+ chat_vote["vote"] = "downvote"
260
+
261
+ history_container.create_item(body=chat_vote)
262
+
263
+ def show_feedback_column(visible):
264
+ if visible:
265
+ # If visible, hide the column
266
+ return gr.update(visible=False), gr.update(value=""), False
267
+ else:
268
+ # If not visible, show the column and clear the Textbox
269
+ return gr.update(visible=True), "", True
270
+
271
+ def user_feedback(value, ):
272
+ chat_feedback = {}
273
+ chat_feedback["id"] = str(uuid.uuid4())
274
+ chat_feedback["chat_id"] = RESPONSE_ID
275
+ chat_feedback["partitionKey"] = "RoofingRoadmapFeedback"
276
+ chat_feedback["feedback"] = value
277
+ chat_feedback["env"] = ENV
278
+ chat_feedback["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
279
+ history_container.create_item(body=chat_feedback)
280
+ return gr.update(visible=False), ""
281
+
282
+ with gr.Blocks() as demo:
283
+ gr.Markdown("## Find literature to answer your question!")
284
+ gr.Markdown("### Ask a question about the wind and hail performance of asphalt shingle, metal, and tile roofs.")
285
+ with gr.Row():
286
+ with gr.Column():
287
+ chatbot = gr.Chatbot(type="messages", height=400)
288
+ chatbot.like(vote, chatbot, None)
289
+ msg = gr.Textbox(label="Hit the Enter to send your question", placeholder="What's on your mind?", show_copy_button=True)
290
+ with gr.Row():
291
+ send = gr.Button("Send", variant="secondary", scale=3)
292
+ feedback = gr.Button("Feedback", variant="stop", scale=1)
293
+ with gr.Column(visible=False, elem_id="feedback_column") as feedback_column:
294
+ usr_msg = gr.Textbox(label="Submit feedback to IBHS", info="What went wrong?", placeholder="Give us as much detail as you can!", lines=3)
295
+ usr_submit = gr.Button("Submit", variant="secondary")
296
+
297
+
298
+
299
+ def user(user_message, history: list):
300
+ return "", history + [{"role": "user", "content": user_message}]
301
+
302
+
303
+ def bot(history: list):
304
+ bot_message = llm_response(history[-1]['content'])
305
+ history.append({"role": "assistant", "content": ""})
306
+ for character in bot_message:
307
+ history[-1]['content'] += character
308
+ yield history
309
+
310
+
311
+ feedback_column_state = gr.State(False)
312
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
313
+ send.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
314
+ feedback.click(fn=show_feedback_column, inputs=[feedback_column_state], outputs=[feedback_column, usr_msg, feedback_column_state])
315
+ usr_submit.click(user_feedback, usr_msg, outputs=[feedback_column, usr_msg])
316
+ gr.Markdown("*Our chatbot is constantly learning and improving to better serve you!*")
317
+ gr.Markdown("#### Additional questions? Contact IBHS Membership Manager Larry Scott at [lscott@ibhs.org]().")
318
+
319
+ if __name__ == "__main__":
320
  demo.launch()