genesisapilist commited on
Commit
e326f71
·
1 Parent(s): a06b2b8

Initial Commit

Browse files
Files changed (6) hide show
  1. app.py +60 -0
  2. chat.py +181 -0
  3. gameload.py +72 -0
  4. helper.py +23 -0
  5. prompt_response.txt +13 -0
  6. requirements.txt +77 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chat
2
+ import gradio
3
+
4
+ def initialize_game(game_id, user_id, user_input):
5
+ result = chat.initialize_game(game_id, user_id, user_input)
6
+ return result
7
+
8
+ def play_game(game_id, user_id, user_input):
9
+ result = chat.start_game(game_id, user_id, user_input)
10
+ return result
11
+
12
+ def health_check(name):
13
+ result = {"role": "assistant", "content": "Hello " + name + "! The site is up"}
14
+ return result
15
+
16
+ def generate_image_prompt(game_id, user_id, user_input):
17
+ result = chat.generate_image_prompt(game_id, user_id, user_input)
18
+ return result
19
+
20
+
21
+ # @app.route('/load_game', methods=['GET'])
22
+ # def load_game():
23
+ # upload_game_docs()
24
+ # response = {'message': 'Game loaded'}
25
+ # return jsonify(response)
26
+
27
+ health_check_gr = gradio.Interface(
28
+ fn=health_check,
29
+ inputs="text",
30
+ outputs="text",
31
+ title="DEVELOPERS ONLY - Health Check for Genesis APIs",
32
+ description="An API to check if the API is working"
33
+ )
34
+
35
+ initialize_game_gr = gradio.Interface(
36
+ fn=initialize_game,
37
+ inputs=["text","text", "text"],
38
+ outputs="text",
39
+ title="Initialize Game",
40
+ description="An API to initialize the game. This is executed ONLY when a user starts a game"
41
+ )
42
+
43
+ play_game_gr = gradio.Interface(
44
+ fn=play_game,
45
+ inputs=["text","text", "text"],
46
+ outputs="text",
47
+ title="Play Game",
48
+ description="An API for the user to interact with the LLM"
49
+ )
50
+
51
+ generate_image_prompt_gr = gradio.Interface(
52
+ fn=generate_image_prompt,
53
+ inputs=["text","text", "text"],
54
+ outputs="text",
55
+ title="Generate prompt for image",
56
+ description="An API for the user to generate a prompt to input to Leo"
57
+ )
58
+
59
+ genesis_app = gradio.TabbedInterface([health_check_gr, initialize_game_gr, play_game_gr, generate_image_prompt_gr], ["Developers - Health Check", "Initialize game", "Play Game", "Generate Prompt for Leo"])
60
+ genesis_app.launch()
chat.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from time import time, sleep
2
+ import datetime
3
+ import dotenv
4
+ import os
5
+ import openai
6
+ import json
7
+ import pinecone
8
+ from uuid import uuid4
9
+ from helper import open_file, save_file, read_word_document
10
+ import re
11
+ from langchain.memory import VectorStoreRetrieverMemory
12
+
13
+ ## Read the environment variables
14
+ dotenv.load_dotenv('.env')
15
+ openai.api_key = os.getenv('OPENAI_API_KEY')
16
+ embedding_model = os.getenv('EMBEDDING_ENGINE')
17
+ convo_length = int(os.getenv('CONVO_LENGTH_TO_FETCH'))
18
+ llm_model = os.getenv('LLM_MODEL')
19
+ debug=False
20
+ if os.getenv('DEBUG') == 'True':
21
+ debug=True
22
+
23
+ pinecone_api_key = os.getenv('PINECONE_API_KEY')
24
+ pinecone_env = os.getenv('PINECONE_REGION')
25
+ pinecone_index = os.getenv('PINECONE_INDEX')
26
+ pinecone.init(
27
+ api_key=pinecone_api_key,
28
+ environment=pinecone_env
29
+ )
30
+ vector_db = pinecone.Index(pinecone_index)
31
+ file_path = os.getenv('GAME_DOCS_FOLDER')
32
+ file_name = os.getenv('GAME_DOCS_FILE')
33
+ game_index = os.getenv('GAME_ID_INDEX')
34
+
35
+ def timestamp_to_datetime(unix_time):
36
+ return datetime.datetime.fromtimestamp(unix_time).strftime("%A, %B %d, %Y at %I:%M%p %Z")
37
+
38
+
39
+ def perform_embedding(content):
40
+ content = content.encode(encoding='ASCII',errors='ignore').decode()
41
+ response = openai.Embedding.create(model=embedding_model, input=content)
42
+ vector = response['data'][0]['embedding']
43
+ return vector
44
+
45
+ def load_conversation(results):
46
+ result = list()
47
+ for m in results['matches']:
48
+ result.append({'time1': m['metadata']['timestring'], 'text': m['metadata']['text']})
49
+ ordered = sorted(result, key=lambda d: d['time1'], reverse = False)
50
+ messages = [i['text'] for i in ordered]
51
+ message_block = '\n'.join(messages).strip()
52
+ return message_block
53
+
54
+
55
+ def call_gpt(prompt):
56
+ max_retry = 5
57
+ retry = 0
58
+ prompt = prompt.encode(encoding='ASCII',errors='ignore').decode()
59
+ while True:
60
+ try:
61
+ response = openai.ChatCompletion.create(
62
+ model=llm_model,
63
+ temperature=0.9,
64
+ messages=[
65
+ {"role": "user", "content": prompt}
66
+ ]
67
+ )
68
+
69
+ text = response.choices[0].message.content
70
+ text = re.sub('[\r\n]+', '\n', text)
71
+ text = re.sub('[\t ]+', ' ', text)
72
+ filename = '%s_gpt3.txt' % time()
73
+ if not os.path.exists('gpt3_logs'):
74
+ os.makedirs('gpt3_logs')
75
+ save_file('gpt3_logs/%s' % filename, prompt + '\n\n==========\n\n' + text)
76
+ response.choices[0].message.content = text
77
+ return response
78
+ except Exception as oops:
79
+ retry += 1
80
+ if retry >= max_retry:
81
+ return "GPT3 error: %s" % oops
82
+ print('Error communicating with OpenAI:', oops)
83
+ sleep(1)
84
+
85
+
86
+ def start_game(game_id, user_id, user_input):
87
+ payload = list()
88
+
89
+ # Get user input, save it, vectorize it and save to pinecone
90
+ timestamp = time()
91
+ timestring = timestamp_to_datetime(timestamp)
92
+ unique_id = str(uuid4())
93
+ vector = perform_embedding(user_input)
94
+ metadata = {'speaker': 'USER', 'user_id': user_id, 'game_id': game_id, 'timestring': timestring, 'text': user_input}
95
+ payload.append((unique_id, vector, metadata))
96
+
97
+
98
+ # Search for relevant messages and return a response
99
+ results=vector_db.query(vector=vector, top_k=convo_length, include_metadata=True,
100
+ filter={
101
+ "$and": [{ "user_id": { "$eq": user_id } }, { "game_id": { "$eq": game_id } }]
102
+ }
103
+ )
104
+ conversation = load_conversation(results)
105
+
106
+
107
+ # Populate prompt
108
+ prompt_text = open_file(f"prompt_{game_id}_{user_id}.txt")
109
+ prompt = open_file('prompt_response.txt').replace('<<PROMPT_VALUE>>', prompt_text).replace('<<CONVERSATION>>', conversation).replace('<<USER_MSG>>', user_input).replace('<<USER_VAL>>', user_id)
110
+
111
+ # Generate response, vectorize
112
+ llm_output_msg = call_gpt(prompt)
113
+ llm_output = llm_output_msg.choices[0].message.content
114
+ timestamp_op = time()
115
+ timestring_op = timestamp_to_datetime(timestamp)
116
+ vector_op = perform_embedding(llm_output)
117
+ unique_id_op = str(uuid4)
118
+ metadata_op = {'speaker': 'BOT', 'user_id': user_id, 'game_id': game_id, 'timestring': timestring, 'text': llm_output}
119
+ payload.append((unique_id_op, vector_op, metadata_op))
120
+
121
+ # Upsert into the vector database
122
+ vector_db.upsert(payload)
123
+
124
+ return(llm_output)
125
+
126
+ def get_game_details(game_id):
127
+ file_data = open_file(f"{file_path}/{game_index}")
128
+ tmp_json = json.loads(file_data)
129
+ for json_item in tmp_json["game_details"]:
130
+ if json_item["game_id"] == game_id:
131
+ return json_item
132
+ return "Not Found"
133
+
134
+ def populate_prompt(game_id, splits):
135
+ prompt_text = list()
136
+ idlist = []
137
+ for j in range(int(splits)):
138
+ idlist.append(game_id + "-" + str(j))
139
+
140
+ results=vector_db.fetch(ids=idlist)
141
+ for ids in idlist:
142
+ prompt_text.append(results['vectors'][ids]["metadata"]["text"])
143
+
144
+ whole_prompt = ' '.join(prompt_text).strip()
145
+ return whole_prompt
146
+
147
+
148
+ def initialize_game(game_id, user_id, user_input):
149
+ game_details = get_game_details(game_id)
150
+ whole_prompt = populate_prompt(game_id, game_details["splits"])
151
+ if debug:
152
+ print(whole_prompt[:1000])
153
+ whole_prompt = whole_prompt.replace("<<USER_INPUT_MSG>>", user_input)
154
+ if debug:
155
+ print(whole_prompt[:1000])
156
+
157
+ llm_prompt_op = call_gpt(whole_prompt)
158
+ #print(llm_prompt_op.choices[0]["message"]["content"])
159
+ fname="prompt_" + game_id + "_" + user_id + ".txt"
160
+ save_file(fname, llm_prompt_op.choices[0]["message"]["content"])
161
+ return llm_prompt_op.choices[0]["message"]["content"]
162
+
163
+
164
+ def generate_image_prompt(game_id, user_id, user_input):
165
+ if 'You have a manual' in user_input:
166
+ user_input = user_input.replace('You have a manual of this newly created simulation in your mind. Now what is the first thing you will do in this world?', '')
167
+ payload = list()
168
+ file_data = open_file(f"{file_path}/image_prompt_leo.txt").replace("<<PROMPT_FOR_IMG>>", user_input)
169
+ leo_input_msg = call_gpt(file_data)
170
+ leo_input = leo_input_msg.choices[0].message.content
171
+ timestamp_op = time()
172
+ timestring_op = timestamp_to_datetime(timestamp_op)
173
+ vector_op = perform_embedding(leo_input)
174
+ unique_id_op = str(uuid4)
175
+ metadata_op = {'speaker': 'BOT4LEO', 'user_id': user_id, 'game_id': game_id, 'timestring': timestring_op, 'text': leo_input}
176
+ payload.append((unique_id_op, vector_op, metadata_op))
177
+
178
+ return leo_input
179
+
180
+ if __name__ == '__main__':
181
+ print("main")
gameload.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # import dotenv
3
+ import openai
4
+ import pinecone
5
+ from langchain.document_loaders import Docx2txtLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ import hashlib
8
+ from time import sleep
9
+ from helper import append_file
10
+ import json
11
+
12
+ ## Read the environment variables
13
+ # dotenv.load_dotenv('.env')
14
+ openai.api_key = os.getenv('OPENAI_API_KEY')
15
+ embedding_model = os.getenv('EMBEDDING_ENGINE')
16
+ debug_mode = os.getenv('DEBUG')
17
+ file_path = os.getenv('GAME_DOCS_FOLDER')
18
+ file_name = os.getenv('GAME_DOCS_FILE')
19
+ game_index = os.getenv('GAME_ID_INDEX')
20
+
21
+ pinecone_api_key = os.getenv('PINECONE_API_KEY')
22
+ pinecone_env = os.getenv('PINECONE_REGION')
23
+ pinecone_index = os.getenv('PINECONE_INDEX')
24
+ pinecone.init(
25
+ api_key=pinecone_api_key,
26
+ environment=pinecone_env
27
+ )
28
+ # check if index_name' index already exists (only create index if not)
29
+ if pinecone_index not in pinecone.list_indexes():
30
+ pinecone.create_index(pinecone_index, dimension=1536, metric="cosine", pods=1, pod_type="p1.x1")
31
+ sleep(3)
32
+
33
+ vector_db = pinecone.Index(pinecone_index)
34
+
35
+
36
+ def perform_embedding(doclist):
37
+ payload=list()
38
+ m = hashlib.md5()
39
+ # convert file_name to unique ID
40
+ m.update(file_name.encode('utf-8'))
41
+ game_id = m.hexdigest()[:12]
42
+ json_val = {"game_id":game_id, "game_file":file_name}
43
+ append_file(f"{file_path}/{game_index}",json.dumps(json_val))
44
+
45
+ for i in range(len(doclist)):
46
+ unique_id = game_id + "-" + str(i)
47
+ content = doclist[i].page_content
48
+ content = content.encode(encoding='ASCII',errors='ignore').decode()
49
+ response = openai.Embedding.create(model=embedding_model, input=content)
50
+ metadata = {'game_id': game_id, 'split_count': i, 'text': content}
51
+ vector = response['data'][0]['embedding']
52
+ payload.append((unique_id, vector, metadata))
53
+
54
+ return payload
55
+
56
+
57
+ def load_split_document():
58
+ loader = Docx2txtLoader(file_path + "/" + file_name)
59
+ word_doc_data = loader.load()
60
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
61
+ docs = text_splitter.split_documents(word_doc_data)
62
+ if debug_mode == 'True':
63
+ print("Total count of splits created: " + str(len(docs)))
64
+ return docs
65
+
66
+ def upload_game_docs():
67
+ docs = load_split_document()
68
+ payload = perform_embedding(docs)
69
+ vector_db.upsert(payload)
70
+
71
+ if __name__ == '__main__':
72
+ upload_game_docs()
helper.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from docx import Document
3
+
4
+ def open_file(filepath):
5
+ with open(filepath, 'r', encoding='utf-8') as infile:
6
+ return infile.read()
7
+
8
+ def save_file(filepath, content):
9
+ with open(filepath, 'w', encoding='utf-8') as outfile:
10
+ outfile.write(content)
11
+
12
+ def append_file(filepath, content):
13
+ with open(filepath, 'w', encoding='utf-8') as outfile:
14
+ outfile.write(content)
15
+
16
+ def read_word_document(filepath):
17
+ try:
18
+ doc = Document(filepath)
19
+ paragraphs = [p.text for p in doc.paragraphs]
20
+ return paragraphs
21
+ except Exception as e:
22
+ print(f"Error reading the Word document: {e}")
23
+ return []
prompt_response.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<PROMPT_VALUE>>
2
+
3
+
4
+ The following are the most relevant messages in the conversation:
5
+ <<CONVERSATION>>
6
+
7
+ <<USER_VAL>>:
8
+ <<USER_MSG>>
9
+
10
+
11
+ You will now provide a response, followed by a question on behalf of the person who answers:
12
+
13
+ GENESIS:
requirements.txt ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==5.0.1
5
+ anyio==3.7.0
6
+ async-timeout==4.0.2
7
+ attrs==23.1.0
8
+ certifi==2023.5.7
9
+ charset-normalizer==3.1.0
10
+ click==8.1.3
11
+ colorama==0.4.6
12
+ contourpy==1.0.7
13
+ cycler==0.11.0
14
+ elevenlabs==0.2.18
15
+ exceptiongroup==1.1.1
16
+ fastapi==0.95.2
17
+ ffmpy==0.3.0
18
+ filelock==3.12.0
19
+ fonttools==4.39.4
20
+ frozenlist==1.3.3
21
+ fsspec==2023.5.0
22
+ gradio==3.32.0
23
+ gradio-client==0.2.5
24
+ h11==0.14.0
25
+ httpcore==0.17.2
26
+ httpx==0.24.1
27
+ huggingface-hub==0.14.1
28
+ idna==3.4
29
+ importlib-resources==5.12.0
30
+ Jinja2==3.1.2
31
+ jsonschema==4.17.3
32
+ kiwisolver==1.4.4
33
+ linkify-it-py==2.0.2
34
+ markdown-it-py==2.2.0
35
+ MarkupSafe==2.1.2
36
+ matplotlib==3.7.1
37
+ mdit-py-plugins==0.3.3
38
+ mdurl==0.1.2
39
+ multidict==6.0.4
40
+ numpy==1.24.3
41
+ orjson==3.8.14
42
+ packaging==23.1
43
+ pandas==2.0.2
44
+ Pillow==9.5.0
45
+ pkgutil-resolve-name==1.3.10
46
+ pydantic==1.10.8
47
+ pydub==0.25.1
48
+ Pygments==2.15.1
49
+ pyparsing==3.0.9
50
+ pyrsistent==0.19.3
51
+ python-dateutil==2.8.2
52
+ python-docx==0.8.11
53
+ python-multipart==0.0.6
54
+ pytz==2023.3
55
+ PyYAML==6.0
56
+ requests==2.31.0
57
+ semantic-version==2.10.0
58
+ six==1.16.0
59
+ sniffio==1.3.0
60
+ starlette==0.27.0
61
+ toolz==0.12.0
62
+ tqdm==4.65.0
63
+ typing-extensions==4.6.2
64
+ tzdata==2023.3
65
+ uc-micro-py==1.0.2
66
+ urllib3==2.0.2
67
+ uvicorn==0.22.0
68
+ websockets==11.0.3
69
+ yarl==1.9.2
70
+ zipp==3.15.0
71
+ flask
72
+ python-dotenv
73
+ langchain
74
+ openai
75
+ pinecone-client
76
+ tiktoken
77
+ docx2txt