David319193 commited on
Commit
fb54f5e
·
verified ·
1 Parent(s): cdac3fe

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +196 -0
  2. bot.jpg +0 -0
  3. embed.py +116 -0
  4. requirements.txt +5 -0
  5. user.png +0 -0
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+
5
+ from langchain.docstore.document import Document
6
+ from langchain.memory import ConversationBufferMemory
7
+ from langchain.prompts import PromptTemplate
8
+ from pinecone import Pinecone
9
+ from langchain_pinecone import PineconeVectorStore
10
+ from langchain_openai import OpenAIEmbeddings
11
+ from langchain.chains.question_answering import load_qa_chain
12
+ from langchain_openai import ChatOpenAI
13
+ from langchain_community.vectorstores import Chroma
14
+ from langchain.docstore.document import Document
15
+ from langchain.chains.summarize import load_summarize_chain
16
+
17
+ from openai import OpenAI
18
+ from dotenv import load_dotenv
19
+ import os, random, json
20
+ from bs4 import BeautifulSoup
21
+
22
+ load_dotenv()
23
+
24
+ openai_api_key = os.getenv("OPENAI_API_KEY")
25
+ pinecone_index = os.getenv("INDEX")
26
+ pinecone_api_key = os.getenv("PINECONE_API_KEY")
27
+
28
+ metadata_list = ['fullname', 'mediator email', 'mediator profile on mediate.com', 'mediator Biography', 'mediator state', 'mediator areas of practice']
29
+ metadata_value = ['Name', "Email", "Profile", "Biography", "State", "Practice"]
30
+
31
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
32
+ openai_client = OpenAI(api_key=openai_api_key)
33
+
34
+ def getMetadata(message):
35
+ tools = [
36
+ {
37
+ "type": "function",
38
+ "function": {
39
+ "name": "get_info",
40
+ "description": "Extract the information of mediator",
41
+ "parameters": {
42
+ "type": "object",
43
+ "properties": {
44
+ "country": {
45
+ "type": "string",
46
+ "description": "Extract mediator's country that user want to search."
47
+ },
48
+ "city": {
49
+ "type": "string",
50
+ "description": "Extract mediator's city that user want to search."
51
+ },
52
+ "state": {
53
+ "type": "string",
54
+ "description": "Extract mediator's state that user want to search."
55
+ }
56
+ }
57
+ },
58
+ }
59
+ }
60
+ ]
61
+
62
+ response = openai_client.chat.completions.create(
63
+ model="gpt-4-1106-preview",
64
+ messages=[
65
+ {"role": "system", "content": f"You are a helpful astronomic assistant. Your role is to extract information about mediator from user's message."},
66
+ {"role": "user", "content": message}
67
+ ],
68
+ tools=tools
69
+ )
70
+
71
+ return response.choices[0].message.tool_calls[0].function.arguments
72
+
73
+ def search(message, history):
74
+ metadata = json.loads(getMetadata(message=message))
75
+ print(metadata)
76
+ tools = [
77
+ {
78
+ "type": "function",
79
+ "function": {
80
+ "name": "mediator_search",
81
+ "description": "Extract how many mediators user want to search.",
82
+ "parameters": {
83
+ "type": "object",
84
+ "properties": {
85
+ "mediator": {
86
+ "type": "number",
87
+ "description": "The number of mediators that user want to search",
88
+ "default": 1
89
+ }
90
+ },
91
+ "required": ["mediator"]
92
+ }
93
+ }
94
+ }
95
+ ]
96
+
97
+ response = openai_client.chat.completions.create(
98
+ model="gpt-4-1106-preview",
99
+ messages=[
100
+ {"role": "system", "content": "Please extract how many mediators users want to search."},
101
+ {"role": "user", "content": message}
102
+ ],
103
+ tools=tools,
104
+ )
105
+
106
+ number_str = response.choices[0].message.tool_calls[0].function.arguments
107
+
108
+ mediator_num = json.loads(number_str)['mediator']
109
+
110
+ print(mediator_num)
111
+ template = """"""
112
+ prompt = "You are a professional mediator information analyzer. You have to write why the following context is related to human's message. Please write 3 or 4 sentences."
113
+
114
+ end = """Context: {context}
115
+ Chat history: {chat_history}
116
+ Human: {human_input}
117
+ Your Response as Chatbot:"""
118
+
119
+ template += prompt + end
120
+
121
+ prompt = PromptTemplate(
122
+ input_variables=["chat_history", "human_input", "context"],
123
+ template=template
124
+ )
125
+
126
+ memory = ConversationBufferMemory(memory_key="chat_history", input_key="human_input")
127
+
128
+ print(message)
129
+ start_time = time.time()
130
+
131
+ pc = Pinecone(api_key=pinecone_api_key)
132
+
133
+ embeddings = OpenAIEmbeddings(api_key=openai_api_key)
134
+
135
+ index = pc.Index(pinecone_index)
136
+
137
+ results = index.query(
138
+ vector=embeddings.embed_query(message),
139
+ top_k=748,
140
+ include_metadata=True
141
+ )
142
+
143
+ end_time = time.time()
144
+
145
+ print("Search Time =>", end_time-start_time)
146
+
147
+ new_docs = []
148
+ new_data = []
149
+ for result in results['matches']:
150
+ if result['score'] > 0.85:
151
+ print(result['score'])
152
+ data = {}
153
+ for metadata in metadata_list:
154
+ data[metadata] = result['metadata'][metadata]
155
+ new_data.append(data)
156
+ else:
157
+ print(result['score'])
158
+ print(len(new_data))
159
+ random.shuffle(new_data)
160
+
161
+ answer = ""
162
+ for index, new_datum in enumerate(new_data):
163
+ if index < mediator_num:
164
+ answer += f"{index+1}\n"
165
+ content = ""
166
+ for metadata_index, metadata in enumerate(metadata_list):
167
+ content += f"{metadata_value[metadata_index]}: {new_datum[metadata]} \n"
168
+ answer += f"{metadata_value[metadata_index]}: {new_datum[metadata]} \n"
169
+
170
+ answer += "\n\n"
171
+ new_doc = Document(page_content=answer)
172
+ new_docs.append(new_doc)
173
+ else:
174
+ break
175
+
176
+ chat_openai = ChatOpenAI(model='gpt-4-1106-preview',
177
+ openai_api_key=openai_api_key)
178
+
179
+ # print(new_docs)
180
+ chain = load_qa_chain(chat_openai, chain_type="stuff", prompt=prompt, memory=memory)
181
+ start_time = time.time()
182
+ output = chain({"input_documents": new_docs, "human_input": message}, return_only_outputs=False)
183
+ end_time = time.time()
184
+
185
+ print("Query Time =>", end_time-start_time)
186
+
187
+ answer += f"Why appropriate: {output['output_text']}"
188
+
189
+ return answer
190
+
191
+ chatbot = gr.Chatbot(avatar_images=["user.png", "bot.jpg"], height=600)
192
+
193
+ demo = gr.ChatInterface(fn=search, title="Mediate.com Chatbot Prototype", multimodal=False, retry_btn=None, clear_btn=None, undo_btn=None, chatbot=chatbot)
194
+
195
+ if __name__ == "__main__":
196
+ demo.launch(debug=True)
bot.jpg ADDED
embed.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pathlib import Path
3
+
4
+ from langchain_openai import OpenAIEmbeddings
5
+ from langchain.document_loaders.base import BaseLoader
6
+ from langchain.docstore.document import Document
7
+ from langchain_pinecone import PineconeVectorStore
8
+ from langchain.chains.summarize import load_summarize_chain
9
+ from langchain_community.document_loaders import WebBaseLoader
10
+ from langchain_openai import ChatOpenAI
11
+
12
+ from typing import Dict, List, Optional
13
+ from dotenv import load_dotenv
14
+ import os, csv
15
+
16
+ load_dotenv()
17
+
18
+ openai_api_key = os.getenv("OPENAI_API_KEY")
19
+ pinecone_index = os.getenv("INDEX")
20
+ pinecone_api_key = os.getenv("PINECONE_API_KEY")
21
+
22
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
23
+
24
+ content_list = ["mediator country", "mediator city", "mediator state", "mediator zip code", "mediator areas of practice"]
25
+
26
+ def summarize(text):
27
+ llm = ChatOpenAI(temperature=0, model_name="gpt-4-1106-preview", api_key=openai_api_key)
28
+ chain = load_summarize_chain(llm, chain_type="stuff")
29
+
30
+ return chain.run([Document(page_content=text)])
31
+
32
+ class MetaDataCSVLoader(BaseLoader):
33
+ def __init__(
34
+ self,
35
+ file_path: str,
36
+ source_column: Optional[str] = None,
37
+ metadata_columns: Optional[List[str]] = None,
38
+ content_columns: Optional[List[str]] =None ,
39
+ csv_args: Optional[Dict] = None,
40
+ encoding: Optional[str] = None,
41
+ ):
42
+ self.file_path = file_path
43
+ self.source_column = source_column
44
+ self.encoding = encoding
45
+ self.csv_args = csv_args or {}
46
+ self.content_columns= content_columns
47
+ self.metadata_columns = metadata_columns
48
+
49
+ def load(self) -> List[Document]:
50
+ docs = []
51
+ with open(self.file_path, newline="", encoding=self.encoding) as csvfile:
52
+ csv_reader = csv.DictReader(csvfile, **self.csv_args) # type: ignore
53
+ for i, row in enumerate(csv_reader):
54
+ if self.content_columns:
55
+ content = "\n".join(f"{k.strip()}: {v.strip()}" for k, v in row.items() if k in self.content_columns)
56
+ else:
57
+ content = "\n".join(f"{k.strip()}: {v.strip()}" for k, v in row.items())
58
+ try:
59
+ source = (
60
+ row[self.source_column]
61
+ if self.source_column is not None
62
+ else self.file_path
63
+ )
64
+ except KeyError:
65
+ raise ValueError(
66
+ f"Source column '{self.source_column}' not found in CSV file."
67
+ )
68
+ metadata = {"source": source, "row": i}
69
+ # ADDED TO SAVE METADATA
70
+ if self.metadata_columns:
71
+ for k, v in row.items():
72
+ if k in self.metadata_columns:
73
+ metadata[k] = v
74
+ # END OF ADDED CODE
75
+ doc = Document(page_content=content, metadata=metadata)
76
+ docs.append(doc)
77
+
78
+ return docs
79
+
80
+
81
+ csv_file_uploaded = st.file_uploader(label="Upload your CSV File here")
82
+
83
+ if csv_file_uploaded is not None:
84
+ def save_file_to_folder(uploadedFile):
85
+ save_folder = 'content'
86
+ save_path = Path(save_folder, uploadedFile.name)
87
+ with open(save_path, mode='wb') as w:
88
+ w.write(uploadedFile.getvalue())
89
+
90
+ if save_path.exists():
91
+ st.success(f'File {uploadedFile.name} is successfully saved!')
92
+
93
+ with open(os.path.join('content/', csv_file_uploaded.name), 'r') as file:
94
+
95
+ csv_reader = csv.reader(file)
96
+
97
+ # Read the headers from the CSV file
98
+ headers = next(csv_reader)
99
+
100
+ filtered_headers= list(filter(lambda x: x != '', headers))
101
+
102
+ loader = MetaDataCSVLoader(os.path.join('content/', csv_file_uploaded.name),
103
+ metadata_columns=filtered_headers, encoding = "utf-8")
104
+ data = loader.load()
105
+
106
+ for datum in data:
107
+ new_content = ""
108
+ for content in content_list:
109
+ new_content += f"{content}: {datum.metadata[content]}\n"
110
+
111
+ datum.page_content = new_content
112
+ datum.metadata['mediator Biography'] = summarize(datum.metadata['mediator Biography'])
113
+
114
+ PineconeVectorStore.from_documents(data, embeddings, index_name=pinecone_index)
115
+
116
+ save_file_to_folder(csv_file_uploaded)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain==0.1.16
2
+ openai==1.13.3
3
+ pinecone-client==3.1.0
4
+ langchain-pinecone==0.0.3
5
+ langchain-openai==0.0.8
user.png ADDED