IBHS commited on
Commit
475ef85
·
verified ·
1 Parent(s): f4fa293

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +334 -319
main.py CHANGED
@@ -1,320 +1,335 @@
1
- from langchain_openai import OpenAIEmbeddings
2
- from langchain_openai import ChatOpenAI
3
- from langchain_chroma import Chroma
4
- import chromadb
5
- from chromadb.config import Settings
6
- from langchain_core.prompts import ChatPromptTemplate
7
- from typing import List
8
-
9
- from langchain_core.documents import Document
10
- from langchain_core.runnables import RunnablePassthrough
11
- from langchain_core.output_parsers import XMLOutputParser
12
- import gradio as gr
13
- import pandas as pd
14
- import logging
15
- from langchain_core.exceptions import OutputParserException
16
- import os
17
- from dotenv import load_dotenv
18
-
19
- from sympy.codegen.ast import continue_
20
-
21
- import azure.cosmos.cosmos_client as cosmos_client
22
- import azure.cosmos.exceptions as exceptions
23
- from azure.cosmos.partition_key import PartitionKey
24
-
25
- from langchain_community.document_loaders import PyPDFLoader
26
- from langchain_core.vectorstores import InMemoryVectorStore
27
- from langchain_text_splitters import RecursiveCharacterTextSplitter
28
- from langchain.chains import create_retrieval_chain
29
- from langchain.chains.combine_documents import create_stuff_documents_chain
30
- from langchain_core.runnables import RunnableLambda
31
- import datetime
32
- import uuid
33
- import hashlib
34
- load_dotenv()
35
- # Constants
36
- PERSIST_DIRECTORY = "chroma_store"
37
- K_VALUE = 5
38
-
39
- xml_system = """You're a helpful AI assistant. Given a user question and some scientific literature
40
- documents which highlight research on different roof cover materials (e.g., asphalt shingles, metal, tile)
41
- and their performance against natural hazards(e.g., wind, hail), answer the user
42
- question.
43
- You’re a helpful AI assistant. Given a user question and scientific literature on various roof cover materials (e.g., asphalt shingles, metal, tile)
44
- and their performance against natural hazards (e.g., wind, hail), provide clear, concise, and informed answers without unnecessary fluff.
45
-
46
- When addressing questions about ‘what is the best roof,’ consider the following factors:
47
- • Geography, aesthetic preferences, budget, frequency of weather-related hazards, roof cover performance, and how performance changes with age.
48
- • For the insurance industry, the ‘best roof’ depends on the specific hazards (their location and frequency), performance expectations and predictability, and the cost of materials.
49
-
50
- If none of the articles answer the question, simply say that there are no articles relevant to your inquiry.
51
- Remember, you must return both an answer and citations. A citation consists of a VERBATIM quote that
52
- justifies the answer and the ID and also Source Name of the quote article. Return a citation for every quote across all articles
53
- that justify the answer. Use the following format for your final output:
54
- <cited_answer>
55
- <answer></answer>
56
- <citations>
57
- <citation><source_id></source_id><source></source><quote></quote></citation>
58
- <citation><source_id></source_id><source></source><quote></quote></citation>
59
- ...
60
- </citations>
61
- </cited_answer>
62
- Here are the articles:{context}"""
63
-
64
- xml_prompt = ChatPromptTemplate.from_messages([("system", xml_system), ("human", "{input}")])
65
-
66
- ENV = os.getenv('ENV')
67
- HOST = os.getenv('ACCOUNT_HOST')
68
- MASTER_KEY = os.getenv('ACCOUNT_KEY')
69
- DATABASE_ID = os.getenv('COSMOS_DATABASE')
70
- CONTAINER_ID = os.getenv('COSMOS_CONTAINER')
71
- HISTORY_CONTAINER_ID = os.getenv('COSMOS_HISTORY_CONTAINER')
72
- client = cosmos_client.CosmosClient(HOST, {'masterKey': MASTER_KEY}, user_agent="CosmosDBPythonQuickstart", user_agent_overwrite=True)
73
- database = client.get_database_client(DATABASE_ID)
74
- container = database.get_container_client(CONTAINER_ID)
75
- history_container = database.get_container_client(HISTORY_CONTAINER_ID)
76
-
77
-
78
- def format_docs_xml(docs: List[Document]) -> str:
79
- formatted_docs = [
80
- f"<source id=\"{i}\">\n<source>{doc.metadata['source']}</source>\n<article_snippet>{doc.page_content}</article_snippet>\n</source>"
81
- for i, doc in enumerate(docs)
82
- ]
83
- return f"\n\n<sources>\n{chr(10).join(formatted_docs)}\n</sources>"
84
-
85
- def parse_output_with_error_handling(output):
86
- try:
87
- return XMLOutputParser()
88
- except Exception:
89
- # return {'answer':{'cited_answer': [{'answer': ''},{'citations': []}]}}
90
- return XMLOutputParser().parse("")
91
-
92
-
93
- llm = ChatOpenAI(model="gpt-4o", temperature=0, api_key=os.getenv("OPEN_API_KEY"))
94
-
95
-
96
- rag_chain_from_docs = (
97
- RunnablePassthrough.assign(context=(lambda x: format_docs_xml(x["context"])))
98
- | xml_prompt
99
- | llm
100
- | XMLOutputParser()
101
- )
102
-
103
- settings = Settings(persist_directory=PERSIST_DIRECTORY)
104
- vectordb = Chroma(embedding_function=OpenAIEmbeddings(api_key=os.getenv("OPEN_API_KEY")), persist_directory=PERSIST_DIRECTORY)
105
- retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": K_VALUE})
106
- retrieve_docs = (lambda x: x["input"]) | retriever
107
- chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
108
- answer=rag_chain_from_docs
109
- )
110
-
111
-
112
- pdf_loader = PyPDFLoader("refine.pdf")
113
- pdf_docs = pdf_loader.load()
114
-
115
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
116
- splits = text_splitter.split_documents(pdf_docs)
117
- pdf_vectorstore = InMemoryVectorStore.from_documents(
118
- documents=splits, embedding=OpenAIEmbeddings(api_key=os.getenv("OPEN_API_KEY"))
119
- )
120
-
121
- pdf_retriever = pdf_vectorstore.as_retriever()
122
-
123
- refine_system_prompt = (
124
- "You are an assistant for question-answering tasks. "
125
- "Use the following pieces of retrieved context to answer "
126
- "the question. provide clear, concise, and informed answers without unnecessary fluff. "
127
- "If you cannot answer the question with the retrieved context, only say that 'Nothing' "
128
- "\n\n"
129
- "{context}"
130
- )
131
-
132
- refine_prompt = ChatPromptTemplate.from_messages(
133
- [
134
- ("system", refine_system_prompt),
135
- ("human", "{input}"),
136
- ]
137
- )
138
-
139
- question_answer_chain = create_stuff_documents_chain(llm, refine_prompt)
140
- pdf_rag_chain = create_retrieval_chain(pdf_retriever, question_answer_chain)
141
-
142
- def get_article_info(df, file_name):
143
- title = df[df["file_name"] == file_name]["title"].iloc[0]
144
- link = df[df["file_name"] == file_name]["link"].iloc[0]
145
- return title, link
146
-
147
-
148
- df = pd.read_csv("articles_db.csv")
149
-
150
-
151
- def vectordb_search(query):
152
- titles, links = [], []
153
- question_search = retriever.invoke(query)
154
- for item in question_search:
155
- edited_item = item.metadata["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
156
- title, link = get_article_info(df, edited_item)
157
- if title not in titles:
158
- titles.append(title)
159
- # if link not in links:
160
- links.append(link)
161
- return "\n".join([f"- [{title}]({link})" for title, link in zip(titles, links)])
162
-
163
- RESPONSE_ID = ""
164
-
165
-
166
- def llm_response(query):
167
- global RESPONSE_ID
168
- RESPONSE_ID = str(uuid.uuid4())
169
- chat = {}
170
- titles, links, res_titles, res_links = [], [], [], []
171
- chat["id"] = RESPONSE_ID
172
- chat["partitionKey"] = "RoofingRoadmap"
173
- chat["user"] = query
174
- chat["env"] = ENV
175
-
176
- pdf_answer = pdf_rag_chain.invoke({"input": f"{query}"})
177
- try:
178
- if 'f wave' in query.lower() or 'f-wave' in query.lower() or 'fwave' in query.lower():
179
- query = query.replace('f wave', 'f-wave shingle').replace('f-wave', 'f-wave shingle').replace('fwave', 'f-wave shingle')
180
- result = chain.invoke({"input": query})
181
- if pdf_answer['answer'] == 'Nothing':
182
- answer = result['answer']['cited_answer'][0]["answer"]
183
- citations = result['answer']['cited_answer'][1]['citations']
184
- for citation in citations:
185
- try:
186
- edited_item = citation['citation'][1]["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
187
- title, link = get_article_info(df, edited_item)
188
- if title not in titles:
189
- titles.append(title)
190
- # if link not in links:
191
- links.append(link)
192
- except (TypeError, KeyError, IndexError):
193
- # Handle the error or simply pass if citation does not have the expected keys
194
- continue
195
- else:
196
- answer = pdf_answer['answer']
197
-
198
- if not result['answer']['cited_answer'][1]['citations']:
199
- answer_with_citations = f"{answer}"
200
- else:
201
- question_search = retriever.invoke(query)
202
- for res_item in question_search:
203
- edited_item = res_item.metadata["source"].replace("\\", "/").replace("Articles/", "").replace("Articles\\", "")
204
- res_title, res_link = get_article_info(df, edited_item)
205
- if res_title not in res_titles and res_title not in titles:
206
- res_titles.append(res_title)
207
- # if res_link not in res_links and res_link not in links:
208
- res_links.append(res_link)
209
-
210
-
211
- except OutputParserException as e:
212
- if pdf_answer['answer'] == 'Nothing':
213
- answer = "Your search is beyond the scope of this tool at this time. Please explore the rest of [IBHS website](https://ibhs.org) to find research on this topic."
214
- return answer
215
- else:
216
- answer = pdf_answer['answer']
217
-
218
- finally:
219
- chat["ai"] = answer
220
- chat["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
221
- container.create_item(body=chat)
222
-
223
- # Build the answer with superscript citations
224
- answer_with_citations = f"{answer}"
225
- for i, (title, link) in enumerate(zip(titles, links), start=1):
226
- answer_with_citations += f" <sup>[[{i}]({link})]</sup> "
227
-
228
- # Build the references section with clickable links
229
- if not links:
230
- markdown_list = f"{answer_with_citations}"
231
- else:
232
- citations_section = "\n\nCitations:\n" + "\n".join(
233
- [f"[{i}]: [{title}]({link})" for i, (title, link) in enumerate(zip(titles, links), start=1)]
234
- )
235
- markdown_list = f"{answer_with_citations}{citations_section}"
236
- # Combine answer and citations for final markdown output
237
-
238
-
239
- if not res_links:
240
- return markdown_list
241
- else:
242
- markdown_list += f"\n\n\nHere is a list of articles that can provide more information about your inquiry:\n"
243
- markdown_list += "\n".join([f"- [{res_title}]({res_link})" for res_title, res_link in zip(res_titles, res_links)])
244
-
245
- return markdown_list
246
-
247
- def vote(value, data: gr.LikeData):
248
- chat_vote = {}
249
- chat_vote["id"] = str(uuid.uuid4())
250
- chat_vote["chat_id"] = RESPONSE_ID
251
- chat_vote["partitionKey"] = "RoofingRoadmapVotes"
252
- chat_vote["response"] = data.value[0].split('<sup>', 1)[0].split('\n', 1)[0]
253
- chat_vote["env"] = ENV
254
- chat_vote["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
255
-
256
- if data.liked:
257
- chat_vote["vote"] = "upvote"
258
- else:
259
- chat_vote["vote"] = "downvote"
260
-
261
- history_container.create_item(body=chat_vote)
262
-
263
- def show_feedback_column(visible):
264
- if visible:
265
- # If visible, hide the column
266
- return gr.update(visible=False), gr.update(value=""), False
267
- else:
268
- # If not visible, show the column and clear the Textbox
269
- return gr.update(visible=True), "", True
270
-
271
- def user_feedback(value, ):
272
- chat_feedback = {}
273
- chat_feedback["id"] = str(uuid.uuid4())
274
- chat_feedback["chat_id"] = RESPONSE_ID
275
- chat_feedback["partitionKey"] = "RoofingRoadmapFeedback"
276
- chat_feedback["feedback"] = value
277
- chat_feedback["env"] = ENV
278
- chat_feedback["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
279
- history_container.create_item(body=chat_feedback)
280
- return gr.update(visible=False), ""
281
-
282
- with gr.Blocks() as demo:
283
- gr.Markdown("## Find literature to answer your question!")
284
- gr.Markdown("### Ask a question about the wind and hail performance of asphalt shingle, metal, and tile roofs.")
285
- with gr.Row():
286
- with gr.Column():
287
- chatbot = gr.Chatbot(type="messages", height=400)
288
- chatbot.like(vote, chatbot, None)
289
- msg = gr.Textbox(label="Hit the Enter to send your question", placeholder="What's on your mind?", show_copy_button=True)
290
- with gr.Row():
291
- send = gr.Button("Send", variant="secondary", scale=3)
292
- feedback = gr.Button("Feedback", variant="stop", scale=1)
293
- with gr.Column(visible=False, elem_id="feedback_column") as feedback_column:
294
- usr_msg = gr.Textbox(label="Submit feedback to IBHS", info="What went wrong?", placeholder="Give us as much detail as you can!", lines=3)
295
- usr_submit = gr.Button("Submit", variant="secondary")
296
-
297
-
298
-
299
- def user(user_message, history: list):
300
- return "", history + [{"role": "user", "content": user_message}]
301
-
302
-
303
- def bot(history: list):
304
- bot_message = llm_response(history[-1]['content'])
305
- history.append({"role": "assistant", "content": ""})
306
- for character in bot_message:
307
- history[-1]['content'] += character
308
- yield history
309
-
310
-
311
- feedback_column_state = gr.State(False)
312
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
313
- send.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
314
- feedback.click(fn=show_feedback_column, inputs=[feedback_column_state], outputs=[feedback_column, usr_msg, feedback_column_state])
315
- usr_submit.click(user_feedback, usr_msg, outputs=[feedback_column, usr_msg])
316
- gr.Markdown("*Our chatbot is constantly learning and improving to better serve you!*")
317
- gr.Markdown("#### Additional questions? Contact IBHS Membership Manager Larry Scott at [lscott@ibhs.org]().")
318
-
319
- if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  demo.launch()
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_chroma import Chroma
4
+ import chromadb
5
+ from chromadb.config import Settings
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from typing import List
8
+
9
+ from langchain_core.documents import Document
10
+ from langchain_core.runnables import RunnablePassthrough
11
+ from langchain_core.output_parsers import XMLOutputParser
12
+ import gradio as gr
13
+ import pandas as pd
14
+ import logging
15
+ from langchain_core.exceptions import OutputParserException
16
+ import os
17
+ from dotenv import load_dotenv
18
+
19
+ from sympy.codegen.ast import continue_
20
+
21
+ import azure.cosmos.cosmos_client as cosmos_client
22
+ import azure.cosmos.exceptions as exceptions
23
+ from azure.cosmos.partition_key import PartitionKey
24
+
25
+ from langchain_community.document_loaders import PyPDFLoader
26
+ from langchain_core.vectorstores import InMemoryVectorStore
27
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
28
+ from langchain.chains import create_retrieval_chain
29
+ from langchain.chains.combine_documents import create_stuff_documents_chain
30
+ from langchain_core.runnables import RunnableLambda
31
+ import datetime
32
+ import uuid
33
+ import hashlib
34
+ load_dotenv()
35
+ # Constants
36
+ PERSIST_DIRECTORY = "chroma_store"
37
+ K_VALUE = 5
38
+
39
+ xml_system = """You're a helpful AI assistant. Given a user question and some scientific literature
40
+ documents which highlight research on different roof cover materials (e.g., asphalt shingles, metal, tile)
41
+ and their performance against natural hazards(e.g., wind, hail), answer the user
42
+ question.
43
+ You’re a helpful AI assistant. Given a user question and scientific literature on various roof cover materials (e.g., asphalt shingles, metal, tile)
44
+ and their performance against natural hazards (e.g., wind, hail), provide clear, concise, and informed answers without unnecessary fluff.
45
+
46
+ When addressing questions about ‘what is the best roof,’ consider the following factors:
47
+ • Geography, aesthetic preferences, budget, frequency of weather-related hazards, roof cover performance, and how performance changes with age.
48
+ • For the insurance industry, the ‘best roof’ depends on the specific hazards (their location and frequency), performance expectations and predictability, and the cost of materials.
49
+
50
+ If none of the articles answer the question, simply say that there are no articles relevant to your inquiry.
51
+ Remember, you must return both an answer and citations. A citation consists of a VERBATIM quote that
52
+ justifies the answer and the ID and also Source Name of the quote article. Return a citation for every quote across all articles
53
+ that justify the answer. Use the following format for your final output:
54
+ <cited_answer>
55
+ <answer></answer>
56
+ <citations>
57
+ <citation><source_id></source_id><source></source><quote></quote></citation>
58
+ <citation><source_id></source_id><source></source><quote></quote></citation>
59
+ ...
60
+ </citations>
61
+ </cited_answer>
62
+ Here are the articles:{context}"""
63
+
64
+ xml_prompt = ChatPromptTemplate.from_messages([("system", xml_system), ("human", "{input}")])
65
+
66
+ ENV = os.getenv('ENV')
67
+ HOST = os.getenv('ACCOUNT_HOST')
68
+ MASTER_KEY = os.getenv('ACCOUNT_KEY')
69
+ DATABASE_ID = os.getenv('COSMOS_DATABASE')
70
+ CONTAINER_ID = os.getenv('COSMOS_CONTAINER')
71
+ HISTORY_CONTAINER_ID = os.getenv('COSMOS_HISTORY_CONTAINER')
72
+ client = cosmos_client.CosmosClient(HOST, {'masterKey': MASTER_KEY}, user_agent="CosmosDBPythonQuickstart", user_agent_overwrite=True)
73
+ database = client.get_database_client(DATABASE_ID)
74
+ container = database.get_container_client(CONTAINER_ID)
75
+ history_container = database.get_container_client(HISTORY_CONTAINER_ID)
76
+
77
+
78
+ def format_docs_xml(docs: List[Document]) -> str:
79
+ formatted_docs = [
80
+ f"<source id=\"{i}\">\n<source>{doc.metadata['source']}</source>\n<article_snippet>{doc.page_content}</article_snippet>\n</source>"
81
+ for i, doc in enumerate(docs)
82
+ ]
83
+ return f"\n\n<sources>\n{chr(10).join(formatted_docs)}\n</sources>"
84
+
85
+ def parse_output_with_error_handling(output):
86
+ try:
87
+ return XMLOutputParser()
88
+ except Exception:
89
+ # return {'answer':{'cited_answer': [{'answer': ''},{'citations': []}]}}
90
+ return XMLOutputParser().parse("")
91
+
92
+
93
+ llm = ChatOpenAI(model="gpt-4o", temperature=0)
94
+
95
+
96
+ rag_chain_from_docs = (
97
+ RunnablePassthrough.assign(context=(lambda x: format_docs_xml(x["context"])))
98
+ | xml_prompt
99
+ | llm
100
+ | XMLOutputParser()
101
+ )
102
+
103
+ settings = Settings(persist_directory=PERSIST_DIRECTORY)
104
+ vectordb = Chroma(embedding_function=OpenAIEmbeddings(), persist_directory=PERSIST_DIRECTORY)
105
+ retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": K_VALUE})
106
+ retrieve_docs = (lambda x: x["input"]) | retriever
107
+ chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
108
+ answer=rag_chain_from_docs
109
+ )
110
+
111
+
112
+ pdf_loader = PyPDFLoader("refine.pdf")
113
+ pdf_docs = pdf_loader.load()
114
+
115
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
116
+ splits = text_splitter.split_documents(pdf_docs)
117
+ pdf_vectorstore = InMemoryVectorStore.from_documents(
118
+ documents=splits, embedding=OpenAIEmbeddings()
119
+ )
120
+
121
+ pdf_retriever = pdf_vectorstore.as_retriever()
122
+
123
+ refine_system_prompt = (
124
+ "You are an assistant for question-answering tasks. "
125
+ "Use the following pieces of retrieved context to answer "
126
+ "the question. provide clear, concise, and informed answers without unnecessary fluff. "
127
+ "If you cannot answer the question with the retrieved context, only say that 'Nothing' "
128
+ "\n\n"
129
+ "{context}"
130
+ )
131
+
132
+ refine_prompt = ChatPromptTemplate.from_messages(
133
+ [
134
+ ("system", refine_system_prompt),
135
+ ("human", "{input}"),
136
+ ]
137
+ )
138
+
139
+ question_answer_chain = create_stuff_documents_chain(llm, refine_prompt)
140
+ pdf_rag_chain = create_retrieval_chain(pdf_retriever, question_answer_chain)
141
+
142
+ def get_article_info(df, file_name):
143
+ title = df[df["file_name"] == file_name]["title"].iloc[0]
144
+ link = df[df["file_name"] == file_name]["link"].iloc[0]
145
+ return title, link
146
+
147
+
148
+ df = pd.read_csv("articles_db.csv")
149
+
150
+
151
+ def vectordb_search(query):
152
+ titles, links = [], []
153
+ question_search = retriever.invoke(query)
154
+ for item in question_search:
155
+ edited_item = item.metadata["source"].replace("Articles/", "")
156
+ title, link = get_article_info(df, edited_item)
157
+ if title not in titles:
158
+ titles.append(title)
159
+ # if link not in links:
160
+ links.append(link)
161
+ return "\n".join([f"- [{title}]({link})" for title, link in zip(titles, links)])
162
+
163
+
164
+ def initialize_session(session_id):
165
+ # If no session_id exists, generate a new one
166
+ if session_id is None:
167
+ session_id = str(uuid.uuid4())
168
+ return session_id
169
+
170
+
171
+ def llm_response(query, session_id):
172
+ print("session_id", session_id)
173
+ chat = {}
174
+ titles, links, res_titles, res_links = [], [], [], []
175
+ session_id = initialize_session(session_id)
176
+ chat["id"] = str(uuid.uuid4())
177
+ chat["chat_id"] = session_id
178
+ chat["partitionKey"] = "RoofingRoadmap"
179
+ chat["user"] = query
180
+ chat["env"] = ENV
181
+
182
+ pdf_answer = pdf_rag_chain.invoke({"input": f"{query}"})
183
+ try:
184
+ if 'f wave' in query.lower() or 'f-wave' in query.lower() or 'fwave' in query.lower():
185
+ query = query.replace('f wave', 'f-wave shingle').replace('f-wave', 'f-wave shingle').replace('fwave', 'f-wave shingle')
186
+ result = chain.invoke({"input": query})
187
+ if pdf_answer['answer'] == 'Nothing':
188
+ answer = result['answer']['cited_answer'][0]["answer"]
189
+ citations = result['answer']['cited_answer'][1]['citations']
190
+ for citation in citations:
191
+ try:
192
+ edited_item = citation['citation'][1]["source"].replace("Articles/", "")
193
+ title, link = get_article_info(df, edited_item)
194
+ if title not in titles:
195
+ titles.append(title)
196
+ # if link not in links:
197
+ links.append(link)
198
+ except (TypeError, KeyError, IndexError):
199
+ # Handle the error or simply pass if citation does not have the expected keys
200
+ continue
201
+ else:
202
+ answer = pdf_answer['answer']
203
+
204
+ if not result['answer']['cited_answer'][1]['citations']:
205
+ answer_with_citations = f"{answer}"
206
+ else:
207
+ question_search = retriever.invoke(query)
208
+ for res_item in question_search:
209
+ edited_item = res_item.metadata["source"].replace("Articles/", "")
210
+ res_title, res_link = get_article_info(df, edited_item)
211
+ if res_title not in res_titles and res_title not in titles:
212
+ res_titles.append(res_title)
213
+ # if res_link not in res_links and res_link not in links:
214
+ res_links.append(res_link)
215
+
216
+
217
+ except OutputParserException as e:
218
+ if pdf_answer['answer'] == 'Nothing':
219
+ answer = "Your search is beyond the scope of this tool at this time. Please explore the rest of [IBHS website](https://ibhs.org) to find research on this topic."
220
+ return answer
221
+ else:
222
+ answer = pdf_answer['answer']
223
+
224
+ finally:
225
+ chat["ai"] = answer
226
+ chat["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
227
+ container.create_item(body=chat)
228
+
229
+ # Build the answer with superscript citations
230
+ answer_with_citations = f"{answer}"
231
+ for i, (title, link) in enumerate(zip(titles, links), start=1):
232
+ answer_with_citations += f" <sup>[[{i}]({link})]</sup> "
233
+
234
+ # Build the references section with clickable links
235
+ if not links:
236
+ markdown_list = f"{answer_with_citations}"
237
+ else:
238
+ citations_section = "\n\nCitations:\n" + "\n".join(
239
+ [f"[{i}]: [{title}]({link})" for i, (title, link) in enumerate(zip(titles, links), start=1)]
240
+ )
241
+ markdown_list = f"{answer_with_citations}{citations_section}"
242
+ # Combine answer and citations for final markdown output
243
+
244
+
245
+ if not res_links:
246
+ return markdown_list
247
+ else:
248
+ markdown_list += f"\n\n\nHere is a list of articles that can provide more information about your inquiry:\n"
249
+ markdown_list += "\n".join([f"- [{res_title}]({res_link})" for res_title, res_link in zip(res_titles, res_links)])
250
+
251
+ return markdown_list
252
+
253
+ def vote(value, data: gr.LikeData, session_id: str = None):
254
+ session_id = initialize_session(session_id)
255
+ print("session_id", session_id)
256
+ chat_vote = {}
257
+ chat_vote["id"] = str(uuid.uuid4())
258
+ chat_vote["chat_id"] = session_id
259
+ chat_vote["partitionKey"] = "RoofingRoadmapVotes"
260
+ chat_vote["response"] = data.value[0].split('<sup>', 1)[0].split('\n', 1)[0]
261
+ chat_vote["env"] = ENV
262
+ chat_vote["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
263
+
264
+ if data.liked:
265
+ chat_vote["vote"] = "upvote"
266
+ else:
267
+ chat_vote["vote"] = "downvote"
268
+
269
+ history_container.create_item(body=chat_vote)
270
+
271
+ def show_feedback_column(visible):
272
+ if visible:
273
+ # If visible, hide the column
274
+ return gr.update(visible=False), gr.update(value=""), False
275
+ else:
276
+ # If not visible, show the column and clear the Textbox
277
+ return gr.update(visible=True), "", True
278
+
279
+ def user_feedback(value, session_id):
280
+ session_id = initialize_session(session_id)
281
+ print("session_id", session_id)
282
+ chat_feedback = {}
283
+ chat_feedback["id"] = str(uuid.uuid4())
284
+ chat_feedback["chat_id"] = session_id
285
+ chat_feedback["partitionKey"] = "RoofingRoadmapFeedback"
286
+ chat_feedback["feedback"] = value
287
+ chat_feedback["env"] = ENV
288
+ chat_feedback["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
289
+ history_container.create_item(body=chat_feedback)
290
+ return gr.update(visible=False), "", session_id
291
+
292
+ with gr.Blocks() as demo:
293
+ # Create a state variable; this is unique per session.
294
+ session_id = gr.State(None)
295
+
296
+ gr.Markdown("## Find literature to answer your question!")
297
+ gr.Markdown("### Ask a question about the wind and hail performance of asphalt shingle, metal, and tile roofs.")
298
+ with gr.Row():
299
+ with gr.Column():
300
+ chatbot = gr.Chatbot(type="messages", height=400)
301
+ chatbot.like(vote, [chatbot, session_id], None)
302
+ msg = gr.Textbox(label="Hit the Enter to send your question", placeholder="What's on your mind?", show_copy_button=True)
303
+ with gr.Row():
304
+ send = gr.Button("Send", variant="secondary", scale=3)
305
+ feedback = gr.Button("Feedback", variant="stop", scale=1)
306
+ with gr.Column(visible=False, elem_id="feedback_column") as feedback_column:
307
+ usr_msg = gr.Textbox(label="Submit feedback to IBHS", info="What went wrong?", placeholder="Give us as much detail as you can!", lines=3)
308
+ usr_submit = gr.Button("Submit", variant="secondary")
309
+
310
+
311
+
312
+ def user(user_message, history: list):
313
+ return "", history + [{"role": "user", "content": user_message}]
314
+
315
+
316
+ def bot(history: list, session_id_i):
317
+ if session_id_i is None:
318
+ session_id_i = initialize_session(session_id_i)
319
+ bot_message = llm_response(history[-1]['content'], session_id_i)
320
+ history.append({"role": "assistant", "content": ""})
321
+ for character in bot_message:
322
+ history[-1]['content'] += character
323
+ yield history, session_id_i
324
+
325
+
326
+ feedback_column_state = gr.State(False)
327
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot, session_id], [chatbot,session_id])
328
+ send.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot, session_id], [chatbot, session_id])
329
+ feedback.click(fn=show_feedback_column, inputs=[feedback_column_state], outputs=[feedback_column, usr_msg, feedback_column_state])
330
+ usr_submit.click(user_feedback, [usr_msg, session_id], outputs=[feedback_column, usr_msg, session_id])
331
+ gr.Markdown("*Our chatbot is constantly learning and improving to better serve you!*")
332
+ gr.Markdown("#### Additional questions? Contact IBHS Membership Manager Larry Scott at [lscott@ibhs.org]().")
333
+
334
+ if __name__ == "__main__":
335
  demo.launch()