joshremis commited on
Commit
0a938e3
·
verified ·
1 Parent(s): 63f0562

Upload 6 files

Browse files
Files changed (6) hide show
  1. .env +3 -0
  2. .gitignore +2 -0
  3. app.py +149 -0
  4. bot.jpg +0 -0
  5. embed.py +98 -0
  6. user.png +0 -0
.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ openai_api_key = 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
2
+ INDEX=chatbot
3
+ PINECONE_API_KEY=8d2d8b7b-7d1a-4cc6-aff3-cf9d64c326d0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ content
2
+ .env
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+
5
+ from langchain.docstore.document import Document
6
+ from langchain.memory import ConversationBufferMemory
7
+ from langchain.prompts import PromptTemplate
8
+ from pinecone import Pinecone
9
+ from langchain_pinecone import PineconeVectorStore
10
+ from langchain_openai import OpenAIEmbeddings
11
+ from langchain.chains.question_answering import load_qa_chain
12
+ from langchain_openai import ChatOpenAI
13
+ from langchain_community.vectorstores import Chroma
14
+ from langchain.docstore.document import Document
15
+
16
+ from openai import OpenAI
17
+ from dotenv import load_dotenv
18
+ import os, random, json
19
+ from bs4 import BeautifulSoup
20
+
21
+ load_dotenv()
22
+
23
+ openai_api_key = os.getenv("OPENAI_API_KEY")
24
+ pinecone_index = os.getenv("INDEX")
25
+ pinecone_api_key = os.getenv("PINECONE_API_KEY")
26
+
27
+ metadata_list = ['fullname', 'mediator email', 'mediator profile on mediate.com', 'mediator Biography']
28
+ metadata_value = ['Name', "Email", "Profile", "Biography"]
29
+
30
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
31
+ openai_client = OpenAI(api_key=openai_api_key)
32
+ def search(message, history):
33
+ tools = [
34
+ {
35
+ "type": "function",
36
+ "function": {
37
+ "name": "mediator_search",
38
+ "description": "Extract how many mediators user want to search.",
39
+ "parameters": {
40
+ "type": "object",
41
+ "properties": {
42
+ "mediator": {
43
+ "type": "number",
44
+ "description": "The number of mediators that user want to search",
45
+ "default": 1
46
+ }
47
+ },
48
+ "required": ["mediator"]
49
+ }
50
+ }
51
+ }
52
+ ]
53
+
54
+ response = openai_client.chat.completions.create(
55
+ model="gpt-4-1106-preview",
56
+ messages=[
57
+ {"role": "system", "content": "Please extract how many mediators users want to search."},
58
+ {"role": "user", "content": message}
59
+ ],
60
+ tools=tools,
61
+ )
62
+
63
+ number_str = response.choices[0].message.tool_calls[0].function.arguments
64
+
65
+ mediator_num = json.loads(number_str)['mediator']
66
+
67
+ print(mediator_num)
68
+ template = """"""
69
+ prompt = "You are a professional mediator information analyzer. You have to write why the following context is related to human's message. Please write 3 or 4 sentences."
70
+
71
+ end = """Context: {context}
72
+ Chat history: {chat_history}
73
+ Human: {human_input}
74
+ Your Response as Chatbot:"""
75
+
76
+ template += prompt + end
77
+
78
+ prompt = PromptTemplate(
79
+ input_variables=["chat_history", "human_input", "context"],
80
+ template=template
81
+ )
82
+
83
+ memory = ConversationBufferMemory(memory_key="chat_history", input_key="human_input")
84
+
85
+ print(message)
86
+ start_time = time.time()
87
+
88
+ pc = Pinecone(api_key=pinecone_api_key)
89
+
90
+ embeddings = OpenAIEmbeddings(api_key=openai_api_key)
91
+
92
+ index = pc.Index(pinecone_index)
93
+
94
+ results = index.query(
95
+ vector=embeddings.embed_query(message),
96
+ top_k=800,
97
+ include_metadata=True
98
+ )
99
+ end_time = time.time()
100
+
101
+ print("Search Time =>", end_time-start_time)
102
+
103
+ new_docs = []
104
+ new_data = []
105
+ for result in results['matches']:
106
+ if result['score'] > 0.75:
107
+ data = {}
108
+ for metadata in metadata_list:
109
+ data[metadata] = BeautifulSoup(result['metadata'][metadata], "html.parser").get_text()
110
+ new_data.append(data)
111
+
112
+ print(len(new_data))
113
+ random.shuffle(new_data)
114
+
115
+ answer = ""
116
+ for index, new_datum in enumerate(new_data):
117
+ if index < mediator_num:
118
+ answer += f"{index+1}\n"
119
+ content = ""
120
+ for metadata_index, metadata in enumerate(metadata_list):
121
+ content += f"{metadata_value[metadata_index]}: {new_datum[metadata]} \n"
122
+ answer += f"{metadata_value[metadata_index]}: {new_datum[metadata]} \n"
123
+
124
+ answer += "\n\n"
125
+ new_doc = Document(page_content=answer)
126
+ new_docs.append(new_doc)
127
+ else:
128
+ break
129
+
130
+ chat_openai = ChatOpenAI(model='gpt-4-1106-preview',
131
+ openai_api_key=openai_api_key)
132
+
133
+ # print(new_docs)
134
+ chain = load_qa_chain(chat_openai, chain_type="stuff", prompt=prompt, memory=memory)
135
+ start_time = time.time()
136
+ output = chain({"input_documents": new_docs, "human_input": message}, return_only_outputs=False)
137
+ end_time = time.time()
138
+
139
+ print("Query Time =>", end_time-start_time)
140
+
141
+ answer += f"Why appropriate: {output['output_text']}"
142
+ return answer
143
+
144
+ chatbot = gr.Chatbot(avatar_images=["user.png", "bot.jpg"], height=600)
145
+
146
+ demo = gr.ChatInterface(fn=search, title="Mediate.com Chatbot Prototype", multimodal=False, retry_btn=None, clear_btn=None, undo_btn=None, chatbot=chatbot)
147
+
148
+ if __name__ == "__main__":
149
+ demo.launch(debug=True)
bot.jpg ADDED
embed.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pathlib import Path
3
+
4
+ from langchain_openai import OpenAIEmbeddings
5
+ from langchain.document_loaders.base import BaseLoader
6
+ from langchain.docstore.document import Document
7
+ from langchain_pinecone import PineconeVectorStore
8
+
9
+ from typing import Dict, List, Optional
10
+ from dotenv import load_dotenv
11
+ import os, csv
12
+
13
+ load_dotenv()
14
+
15
+ openai_api_key = os.getenv("OPENAI_API_KEY")
16
+ pinecone_index = os.getenv("INDEX")
17
+ pinecone_api_key = os.getenv("PINECONE_API_KEY")
18
+
19
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
20
+
21
+ class MetaDataCSVLoader(BaseLoader):
22
+ def __init__(
23
+ self,
24
+ file_path: str,
25
+ source_column: Optional[str] = None,
26
+ metadata_columns: Optional[List[str]] = None,
27
+ content_columns: Optional[List[str]] =None ,
28
+ csv_args: Optional[Dict] = None,
29
+ encoding: Optional[str] = None,
30
+ ):
31
+ self.file_path = file_path
32
+ self.source_column = source_column
33
+ self.encoding = encoding
34
+ self.csv_args = csv_args or {}
35
+ self.content_columns= content_columns
36
+ self.metadata_columns = metadata_columns
37
+
38
+ def load(self) -> List[Document]:
39
+ docs = []
40
+ with open(self.file_path, newline="", encoding=self.encoding) as csvfile:
41
+ csv_reader = csv.DictReader(csvfile, **self.csv_args) # type: ignore
42
+ for i, row in enumerate(csv_reader):
43
+ if self.content_columns:
44
+ content = "\n".join(f"{k.strip()}: {v.strip()}" for k, v in row.items() if k in self.content_columns)
45
+ else:
46
+ content = "\n".join(f"{k.strip()}: {v.strip()}" for k, v in row.items())
47
+ try:
48
+ source = (
49
+ row[self.source_column]
50
+ if self.source_column is not None
51
+ else self.file_path
52
+ )
53
+ except KeyError:
54
+ raise ValueError(
55
+ f"Source column '{self.source_column}' not found in CSV file."
56
+ )
57
+ metadata = {"source": source, "row": i}
58
+ # ADDED TO SAVE METADATA
59
+ if self.metadata_columns:
60
+ for k, v in row.items():
61
+ if k in self.metadata_columns:
62
+ metadata[k] = v
63
+ # END OF ADDED CODE
64
+ doc = Document(page_content=content, metadata=metadata)
65
+ docs.append(doc)
66
+
67
+ return docs
68
+
69
+
70
+ csv_file_uploaded = st.file_uploader(label="Upload your CSV File here")
71
+
72
+ if csv_file_uploaded is not None:
73
+ def save_file_to_folder(uploadedFile):
74
+ save_folder = 'content'
75
+ save_path = Path(save_folder, uploadedFile.name)
76
+ with open(save_path, mode='wb') as w:
77
+ w.write(uploadedFile.getvalue())
78
+
79
+ if save_path.exists():
80
+ st.success(f'File {uploadedFile.name} is successfully saved!')
81
+
82
+ with open(os.path.join('content/', csv_file_uploaded.name), 'r') as file:
83
+
84
+ csv_reader = csv.reader(file)
85
+
86
+ # Read the headers from the CSV file
87
+ headers = next(csv_reader)
88
+
89
+ filtered_headers= list(filter(lambda x: x != '', headers))
90
+
91
+ loader = MetaDataCSVLoader(os.path.join('content/', csv_file_uploaded.name),
92
+ metadata_columns=filtered_headers, encoding = "utf-8")
93
+ data = loader.load()
94
+
95
+ # Pinecone.from_documents(data, embeddings, index_name=index_name)
96
+ PineconeVectorStore.from_documents(data, embeddings, index_name=pinecone_index)
97
+
98
+ save_file_to_folder(csv_file_uploaded)
user.png ADDED