paulpierre commited on
Commit
0245be8
·
0 Parent(s):

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env-example +48 -0
  2. .gitignore +17 -0
  3. LICENSE +21 -0
  4. Makefile +283 -0
  5. README.md +679 -0
  6. RESULTS.md +166 -0
  7. app/api/.dockerignore +4 -0
  8. app/api/API.md +666 -0
  9. app/api/Dockerfile +15 -0
  10. app/api/Makefile +159 -0
  11. app/api/config.py +176 -0
  12. app/api/data/training_data/org-about_the_company.md +36 -0
  13. app/api/data/training_data/org-board_of_directors.md +28 -0
  14. app/api/data/training_data/org-company_story.md +31 -0
  15. app/api/data/training_data/org-corporate_philosophy.md +31 -0
  16. app/api/data/training_data/org-customer_support.md +28 -0
  17. app/api/data/training_data/org-earnings_fy2023.md +58 -0
  18. app/api/data/training_data/org-management_team.md +28 -0
  19. app/api/data/training_data/project-frogonil.md +48 -0
  20. app/api/data/training_data/project-kekzal.md +50 -0
  21. app/api/data/training_data/project-memegen.md +36 -0
  22. app/api/data/training_data/project-memetrex.md +48 -0
  23. app/api/data/training_data/project-neurokek.md +56 -0
  24. app/api/data/training_data/project-pepetamine.md +48 -0
  25. app/api/data/training_data/project-pepetrak.md +36 -0
  26. app/api/helpers.py +658 -0
  27. app/api/llm.py +465 -0
  28. app/api/main.py +567 -0
  29. app/api/models.py +660 -0
  30. app/api/ngrok.py +117 -0
  31. app/api/requirements.txt +12 -0
  32. app/api/seed.py +166 -0
  33. app/api/static/img/rasagpt-icon-200x200.png +0 -0
  34. app/api/static/img/rasagpt-logo-1.png +0 -0
  35. app/api/static/img/rasagpt-logo-2.png +0 -0
  36. app/api/util.py +80 -0
  37. app/api/wait-for-it.sh +0 -0
  38. app/db/Dockerfile +5 -0
  39. app/db/create_db.sh +13 -0
  40. app/rasa-credentials/.dockerignore +4 -0
  41. app/rasa-credentials/Dockerfile +15 -0
  42. app/rasa-credentials/main.py +182 -0
  43. app/rasa-credentials/requirements.txt +8 -0
  44. app/rasa/.dockerignore +4 -0
  45. app/rasa/actions/Dockerfile +9 -0
  46. app/rasa/actions/__init__.py +0 -0
  47. app/rasa/actions/actions.py +73 -0
  48. app/rasa/config.yml +5 -0
  49. app/rasa/credentials.yml +7 -0
  50. app/rasa/custom_telegram.py +16 -0
.env-example ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ENV=local
2
+
3
+ FILE_UPLOAD_PATH=data
4
+ LLM_DEFAULT_TEMPERATURE=0
5
+ LLM_CHUNK_SIZE=1000
6
+ LLM_CHUNK_OVERLAP=200
7
+ LLM_DISTANCE_THRESHOLD=0.2
8
+ LLM_MAX_OUTPUT_TOKENS=256
9
+ LLM_MIN_NODE_LIMIT=3
10
+ LLM_DEFAULT_DISTANCE_STRATEGY=EUCLIDEAN
11
+
12
+ POSTGRES_USER=postgres
13
+ POSTGRES_PASSWORD=postgres
14
+ POSTGRES_DB=postgres
15
+ PGVECTOR_ADD_INDEX=true
16
+
17
+ DB_HOST=db
18
+ DB_PORT=5432
19
+ DB_USER=api
20
+ DB_NAME=api
21
+ DB_PASSWORD=<YOUR DATABASE PASSWORD>
22
+
23
+ NGROK_HOST=ngrok
24
+ NGROK_PORT=4040
25
+ NGROK_AUTHTOKEN=<YOUR NGROK AUTH TOKEN>
26
+ NGROK_API_KEY=<YOUR NGROK API KEY>
27
+ NGROK_INTERNAL_WEBHOOK_HOST=api
28
+ NGROK_INTERNAL_WEBHOOK_PORT=8888
29
+ NGROK_DEBUG=true
30
+ NGROK_CONFIG=/etc/ngrok.yml
31
+
32
+ RASA_WEBHOOK_HOST=rasa-core
33
+ RASA_WEBHOOK_PORT=5005
34
+
35
+ CREDENTIALS_PATH=/app/rasa/credentials.yml
36
+
37
+ TELEGRAM_ACCESS_TOKEN=<YOUR TELEGRAM ACCESS TOKEN>
38
+ TELEGRAM_BOTNAME=rasagpt
39
+
40
+ API_PORT=8888
41
+ API_HOST=api
42
+
43
+ PGADMIN_PORT=5050
44
+ PGADMIN_DEFAULT_PASSWORD=pgadmin
45
+ PGADMIN_DEFAULT_EMAIL=your@emailaddress.com
46
+
47
+ MODEL_NAME=gpt-3.5-turbo
48
+ OPENAI_API_KEY=<YOUR OPEN AI KEY>
.gitignore ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ .trunk
3
+ .vscode
4
+ mnt
5
+ venv/
6
+ .env
7
+ .env-dev
8
+ .env
9
+ .env-staging
10
+ .env-stage
11
+ .env-prod
12
+ .env-production
13
+ __pycache__/
14
+ app/rasa/models/*
15
+ app/rasa/.rasa
16
+ app/rasa/.config
17
+ app/rasa/.keras
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Paul Pierre
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Makefile ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: default banner help install build run stop restart logs ngrok pgadmin api api-stop db db-stop db-purge purge models shell-api shell-db shell-rasa shell-actions rasa-train rasa-start rasa-stop
2
+
3
+ defaut: help
4
+
5
+ help:
6
+ @make banner
7
+ @echo "+------------------+"
8
+ @echo "| 🏠 CORE COMMANDS |"
9
+ @echo "+------------------+"
10
+ @echo "make install - Install and run RasaGPT"
11
+ @echo "make build - Build docker images"
12
+ @echo "make run - Run RasaGPT"
13
+ @echo "make stop - Stop RasaGPT"
14
+ @echo "make restart - Restart RasaGPT\n"
15
+ @echo "+--------------------+"
16
+ @echo "| 🌍 ADMIN INTERACES |"
17
+ @echo "+--------------------+"
18
+ @echo "make logs - View logs via Dozzle"
19
+ @echo "make ngrok - View ngrok dashboard"
20
+ @echo "make pgadmin - View pgAdmin dashboard\n"
21
+ @echo "+-----------------------+"
22
+ @echo "| 👷 DEBUGGING COMMANDS |"
23
+ @echo "+-----------------------+"
24
+ @echo "make api - Run only API server"
25
+ @echo "make models - Build Rasa models"
26
+ @echo "make purge - Remove all docker images"
27
+ @echo "make db-purge - Delete all data in database"
28
+ @echo "make shell-api - Open shell in API container"
29
+ @echo "make shell-db - Open shell in database container"
30
+ @echo "make shell-rasa - Open shell in Rasa container"
31
+ @echo "make shell-actions - Open shell in Rasa actions container\n"
32
+
33
+
34
+ banner:
35
+ @echo "\n\n-------------------------------------"
36
+ @echo "▒█▀▀█ █▀▀█ █▀▀ █▀▀█ ▒█▀▀█ ▒█▀▀█ ▀▀█▀▀"
37
+ @echo "▒█▄▄▀ █▄▄█ ▀▀█ █▄▄█ ▒█░▄▄ ▒█▄▄█ ░▒█░░"
38
+ @echo "▒█░▒█ ▀░░▀ ▀▀▀ ▀░░▀ ▒█▄▄█ ▒█░░░ ░▒█░░"
39
+ @echo "+-----------------------------------+"
40
+ @echo "| http://RasaGPT.dev by @paulpierre |"
41
+ @echo "+-----------------------------------+\n\n"
42
+
43
+
44
+
45
+ # ==========================
46
+ # 👷 INITIALIZATION COMMANDS
47
+ # ==========================
48
+
49
+ # ---------------------------------------
50
+ # Run this first to setup the environment
51
+ # ---------------------------------------
52
+ install:
53
+ @make banner
54
+ @make stop
55
+ @make env-var
56
+ @make rasa-train
57
+ @make build
58
+ @make run
59
+ @make models
60
+ @make rasa-restart
61
+ @make seed
62
+ @echo "✅ RasaGPT installed and running"
63
+
64
+ # -----------------------
65
+ # Build the docker images
66
+ # -----------------------
67
+ build:
68
+ @echo "🏗️ Building docker images ..\n"
69
+ @docker-compose -f docker-compose.yml build
70
+
71
+
72
+ # ================
73
+ # 🏠 CORE COMMANDS
74
+ # ================
75
+
76
+ # ---------------------------
77
+ # Startup all docker services
78
+ # ---------------------------
79
+
80
+ run:
81
+ @echo "🚀 Starting docker-compose.yml ..\n"
82
+ @docker-compose -f docker-compose.yml up -d
83
+
84
+ # ---------------------------
85
+ # Stop all running containers
86
+ # ---------------------------
87
+
88
+ stop:
89
+ @echo "🔍 Stopping any running containers .. \n"
90
+ @docker-compose -f docker-compose.yml down
91
+
92
+ # ----------------------
93
+ # Restart all containers
94
+ # ----------------------
95
+ restart:
96
+ @echo "🔁 Restarting docker services ..\n"
97
+ @make stop
98
+ @make run
99
+
100
+ # ----------------------
101
+ # Restart Rasa core only
102
+ # ----------------------
103
+ rasa-restart:
104
+ @echo "🤖 Restarting Rasa so it grabs credentials ..\n"
105
+ @make rasa-stop
106
+ @make rasa-start
107
+
108
+ rasa-stop:
109
+ @echo "🤖 Stopping Rasa ..\n"
110
+ @docker-compose -f docker-compose.yml stop rasa-core
111
+
112
+ rasa-start:
113
+ @echo "🤖 Starting Rasa ..\n"
114
+ @docker-compose -f docker-compose.yml up -d rasa-core
115
+
116
+ rasa-build:
117
+ @echo "🤖 Building Rasa ..\n"
118
+ @docker-compose -f docker-compose.yml build rasa-core
119
+
120
+ # -----------------------
121
+ # Seed database with data
122
+ # -----------------------
123
+ seed:
124
+ @echo "🌱 Seeding database ..\n"
125
+ @docker-compose -f docker-compose.yml exec api /app/api/wait-for-it.sh db:5432 --timeout=60 -- python3 seed.py
126
+
127
+
128
+ # =======================
129
+ # 🌍 WEB ADMIN INTERFACES
130
+ # =======================
131
+
132
+ # -------------------------
133
+ # Reverse HTTP tunnel admin
134
+ # -------------------------
135
+ ngrok:
136
+ @echo "📡 Opening ngrok agent in the browser ..\n"
137
+ @open http://localhost:4040
138
+
139
+ # ------------------------
140
+ # Postgres admin interface
141
+ # ------------------------
142
+ pgadmin:
143
+ @echo "👷‍♂️ Opening PG Admin in the browser ..\n"
144
+ @open http://localhost:5050
145
+
146
+ # ------------------------
147
+ # Container logs interface
148
+ # ------------------------
149
+ logs:
150
+ @echo "🔍 Opening container logs in the browser ..\n"
151
+ @open http://localhost:9999/
152
+
153
+ # =====================
154
+ # 👷 DEBUGGING COMMANDS
155
+ # =====================
156
+
157
+ # ---------------------------
158
+ # Startup just the API server
159
+ # ---------------------------
160
+ api:
161
+ @make db
162
+ @echo "🚀 Starting FastAPI and postgres ..\n"
163
+ @docker-compose -f docker-compose.yml up -d api
164
+
165
+ # ------------------------
166
+ # Startup just Postgres DB
167
+ # ------------------------
168
+ db:
169
+ @echo "🚀 Starting Postgres with pgvector ..\n"
170
+ @docker-compose -f docker-compose.yml up -d db
171
+
172
+
173
+ db-stop:
174
+ @echo " Stopping the database ..\n"
175
+ @docker-compose -f docker-compose.yml down db
176
+
177
+
178
+ db-reset:
179
+ @echo " Resetting the database ..\n"
180
+ @make db-purge
181
+ @make api
182
+ @make models
183
+
184
+ # -------------------------------
185
+ # Build the schema in Postgres DB
186
+ # -------------------------------
187
+ models:
188
+ @echo "💽 Building models in Postgres ..\n"
189
+ @docker-compose -f docker-compose.yml exec api /app/api/wait-for-it.sh db:5432 --timeout=60 -- python3 models.py
190
+
191
+ # -------------------------------
192
+ # Delete containers or bad images
193
+ # -------------------------------
194
+ purge:
195
+ @echo "🧹 Purging all containers and images ..\n"
196
+ @make stop
197
+ @docker system prune -a
198
+ @make install
199
+
200
+ # --------------------------------
201
+ # Delete the database mount volume
202
+ # --------------------------------
203
+ db-purge:
204
+ @echo "⛔ Are you sure you want to delete all data in the database? [y/N]\n"
205
+ @read confirmation; \
206
+ if [ "$$confirmation" = "y" ] || [ "$$confirmation" = "Y" ]; then \
207
+ echo "Deleting generated files .."; \
208
+ make stop; \
209
+ rm -rf ./mnt; \
210
+ echo "Deleted."; \
211
+ else \
212
+ echo "Aborted."; \
213
+ fi
214
+
215
+ # --------------------------------------
216
+ # Open a bash shell in the API container
217
+ # --------------------------------------
218
+ shell-api:
219
+ @echo "💻🐢 Opening a bash shell in the RasaGPT API container ..\n"
220
+ @if docker ps | grep api > /dev/null; then \
221
+ docker exec -it $$(docker ps | grep api | tr -d '\n' | awk '{print $$1}') /bin/bash; \
222
+ else \
223
+ echo "Container api is not running"; \
224
+ fi
225
+
226
+ # ---------------------------------------
227
+ # Open a bash shell in the Rasa container
228
+ # ---------------------------------------
229
+ shell-rasa:
230
+ @echo "💻🐢 Opening a bash shell in the rasa-core container ..\n"
231
+ @if docker ps | grep rasa-core > /dev/null; then \
232
+ docker exec -it $$(docker ps | grep rasa-core | tr -d '\n' | awk '{print $$1}') /bin/bash; \
233
+ else \
234
+ echo "Container rasa-core is not running"; \
235
+ fi
236
+
237
+ # -----------------------------------------------
238
+ # Open a bash shell in the Rasa actions container
239
+ # -----------------------------------------------
240
+ shell-actions:
241
+ @echo "💻🐢 Opening a bash shell in the rasa-actions container ..\n"
242
+ @if docker ps | grep rasa-actions > /dev/null; then \
243
+ docker exec -it $$(docker ps | grep rasa-actions | tr -d '\n' | awk '{print $$1}') /bin/bash; \
244
+ else \
245
+ echo "Container rasa-actions is not running"; \
246
+ fi
247
+
248
+ # -------------------------------------------
249
+ # Open a bash shell in the Postgres container
250
+ # -------------------------------------------
251
+ shell-db:
252
+ @echo "💻🐢 Opening a bash shell in the Postgres container ..\n"
253
+ @if docker ps | grep db > /dev/null; then \
254
+ docker exec -it $$(docker ps | grep db | tr -d '\n' | awk '{print $$1}') /bin/bash; \
255
+ else \
256
+ echo "Container db is not running"; \
257
+ fi
258
+
259
+ # ==================
260
+ # 💁 HELPER COMMANDS
261
+ # ==================
262
+
263
+ # -------------
264
+ # Check envvars
265
+ # -------------
266
+ env-var:
267
+ @echo "🔍 Checking if envvars are set ..\n";
268
+ @if ! test -e "./.env"; then \
269
+ @echo "❌ .env file not found. Please copy .env-example to .env and update values"; \
270
+ exit 1; \
271
+ else \
272
+ echo "✅ found .env\n"; \
273
+ fi
274
+
275
+ # -----------------
276
+ # Train Rasa models
277
+ # -----------------
278
+ rasa-train:
279
+ @echo "💽 Generating Rasa models ..\n"
280
+ @make rasa-start
281
+ @docker-compose -f docker-compose.yml exec rasa-core rasa train
282
+ @make rasa-stop
283
+ @echo "✅ Done\n"
README.md ADDED
@@ -0,0 +1,679 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ![RasaGPT Logo](https://github.com/paulpierre/RasaGPT/blob/main/github/title.png?raw=true)
3
+
4
+ <br/><br/>
5
+
6
+
7
+ # 🏠 Overview
8
+
9
+ 💬 RasaGPT is the first headless LLM chatbot platform built on top of [Rasa](https://github.com/RasaHQ/rasa) and [Langchain](https://github.com/hwchase17/langchain). It is boilerplate and a reference implementation of Rasa and Telegram utilizing an LLM library like Langchain for indexing, retrieval and context injection.
10
+
11
+ <br/><br/>
12
+
13
+ # 💁‍♀️ Why RasaGPT?
14
+
15
+ RasaGPT works out of the box. A lot of the implementing headaches were sorted out so you don’t have to, including:
16
+
17
+ - Creating your own proprietary bot end-point using FastAPI, document upload and “training” 'pipeline included
18
+ - How to integrate Langchain/LlamaIndex and Rasa
19
+ - Library conflicts with LLM libraries and passing metadata
20
+ - Dockerized [support on MacOS](https://github.com/khalo-sa/rasa-apple-silicon) for running Rasa
21
+ - Reverse proxy with chatbots [via ngrok](https://ngrok.com/docs/ngrok-agent/)
22
+ - Implementing pgvector with your own custom schema instead of using Langchain’s highly opinionated [PGVector class](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/pgvector.html)
23
+ - Adding multi-tenancy (Rasa [doesn't natively support this](https://forum.rasa.com/t/multi-tenancy-in-rasa-core/2382)), sessions and metadata between Rasa and your own backend / application
24
+
25
+ The backstory is familiar. A friend came to me with a problem. I scoured Google and Github for a decent reference implementation of LLM’s integrated with Rasa but came up empty-handed. I figured this to be a great opportunity to satiate my curiosity and 2 days later I had a proof of concept, and a week later this is what I came up with.
26
+
27
+ <br/>
28
+
29
+ > ⚠️ **Caveat emptor:**
30
+ This is far from production code and rife with prompt injection and general security vulnerabilities. I just hope someone finds this useful 😊
31
+
32
+
33
+ <br/><br/>
34
+
35
+ # **✨** Quick start
36
+
37
+ Getting started is easy, just make sure you meet the dependencies below.
38
+
39
+ ```bash
40
+ # Get the code
41
+ git clone https://github.com/paulpierre/RasaGPT.git
42
+ cd RasaGPT
43
+
44
+ ## Setup the .env file
45
+ cp .env-example .env
46
+
47
+ # Edit your .env file and add all the necessary credentials
48
+ make install
49
+
50
+ # Type "make" to see more options
51
+ make
52
+ ```
53
+
54
+ <br/><br/>
55
+
56
+
57
+ # 🔥 Features
58
+
59
+ ## Full Application and API
60
+
61
+ - LLM “learns” on an arbitrary corpus of data using Langchain
62
+ - Upload documents and “train” all via [FastAPI](https://fastapi.tiangolo.com/)
63
+ - Document versioning and automatic “re-training” implemented on upload
64
+ - Customize your own async end-points and database models via [FastAPI](https://fastapi.tiangolo.com/) and [SQLModel](https://sqlmodel.tiangolo.com/)
65
+ - Bot determines whether human handoff is necessary
66
+ - Bot generates tags based on user questions and response automatically
67
+ - Full API documentation via [Swagger](https://github.com/swagger-api/swagger-ui) and [Redoc](https://redocly.github.io/redoc/) included
68
+ - [Ngrok](ngrok.com/docs) end-points are automatically generated for you on startup so your bot can always be accessed via `https://t.me/yourbotname`
69
+ - Embedding similarity search built into Postgres via [pgvector](https://github.com/pgvector/pgvector) and Postgres functions
70
+ - [Dummy data included](https://github.com/paulpierre/RasaGPT/tree/main/app/api/data/training_data) for you to test and experiment
71
+ - Unlimited use cases from help desk, customer support, quiz, e-learning, dungeon and dragons, and more
72
+ <br/><br/>
73
+ ## Rasa integration
74
+
75
+ - Built on top of [Rasa](https://rasa.com/docs/rasa/), the open source gold-standard for chat platforms
76
+ - Supports MacOS M1/M2 via Docker (canonical Rasa image [lacks MacOS arch. support](https://github.com/khalo-sa/rasa-apple-silicon))
77
+ - Supports Telegram, easily integrate Slack, Whatsapp, Line, SMS, etc.
78
+ - Setup complex dialog pipelines using NLU models form Huggingface like BERT or libraries/frameworks like Keras, Tensorflow with OpenAI GPT as fallback
79
+ <br/><br/>
80
+ ## Flexibility
81
+
82
+ - Extend agentic, memory, etc. capabilities with Langchain
83
+ - Schema supports multi-tenancy, sessions, data storage
84
+ - Customize agent personalities
85
+ - Saves all of chat history and creating embeddings from all interactions future-proofing your retrieval strategy
86
+ - Automatically generate embeddings from knowledge base corpus and client feedback
87
+
88
+ <br/><br/>
89
+
90
+ # 🧑‍💻 Installing
91
+
92
+ ## Requirements
93
+
94
+ - Python 3.9
95
+ - Docker & Docker compose ([Docker desktop MacOS](https://www.docker.com/products/docker-desktop/))
96
+ - Open AI [API key](https://platform.openai.com/account/api-keys)
97
+ - Telegram [bot credentials](https://core.telegram.org/bots#how-do-i-create-a-bot)
98
+ - Ngrok [auth token](https://dashboard.ngrok.com/tunnels/authtokens)
99
+ - Make ([MacOS](https://formulae.brew.sh/formula/make)/[Windows](https://stackoverflow.com/questions/32127524/how-to-install-and-use-make-in-windows))
100
+ - SQLModel
101
+
102
+ <br/>
103
+
104
+ ## Setup
105
+
106
+ ```bash
107
+ git clone https://github.com/paulpierre/RasaGPT.git
108
+ cd RasaGPT
109
+ cp .env-example .env
110
+
111
+ # Edit your .env file and all the credentials
112
+
113
+ ```
114
+
115
+ <br/>
116
+
117
+
118
+ At any point feel free to just type in `make` and it will display the list of options, mostly useful for debugging:
119
+
120
+ <br/>
121
+
122
+
123
+ ![Makefile main](https://github.com/paulpierre/RasaGPT/blob/main/github/makefile-1.png?raw=true)
124
+
125
+ <br/>
126
+
127
+ ## Docker-compose
128
+
129
+ The easiest way to get started is using the `Makefile` in the root directory. It will install and run all the services for RasaGPT in the correct order.
130
+
131
+ ```bash
132
+ make install
133
+
134
+ # This will automatically install and run RasaGPT
135
+ # After installation, to run again you can simply run
136
+
137
+ make run
138
+ ```
139
+ <br/>
140
+
141
+ ## Local Python Environment
142
+
143
+ This is useful if you wish to focus on developing on top of the API, a separate `Makefile` was made for this. This will create a local virtual environment for you.
144
+
145
+ ```bash
146
+ # Assuming you are already in the RasaGPT directory
147
+ cd app/api
148
+ make install
149
+
150
+ # This will automatically install and run RasaGPT
151
+ # After installation, to run again you can simply run
152
+
153
+ make run
154
+ ```
155
+ <br/>
156
+
157
+ Similarly, enter `make` to see a full list of commands
158
+
159
+ ![Makefile API](https://github.com/paulpierre/RasaGPT/blob/main/github/makefile-2.png?raw=true)
160
+
161
+ <br/>
162
+
163
+ ## Installation process
164
+
165
+ Installation should be automated should look like this:
166
+
167
+ ![Installation](https://github.com/paulpierre/RasaGPT/blob/main/github/cli_install.png?raw=true)
168
+
169
+ 👉 Full installation log: [https://app.warp.dev/block/vflua6Eue29EPk8EVvW8Kd](https://app.warp.dev/block/vflua6Eue29EPk8EVvW8Kd)
170
+
171
+ <br/>
172
+
173
+ The installation process for Docker takes the following steps at a high level
174
+
175
+ 1. Check to make sure you have `.env` available
176
+ 2. Database is initialized with [`pgvector`](https://github.com/pgvector/pgvector)
177
+ 3. Database models create the database schema
178
+ 4. Trains the Rasa model so it is ready to run
179
+ 5. Sets up ngrok with Rasa so Telegram has a webhook back to your API server
180
+ 6. Sets up the Rasa actions server so Rasa can talk to the RasaGPT API
181
+ 7. Database is populated with dummy data via `seed.py`
182
+
183
+ <br/><br/>
184
+
185
+ # ☑️ Next steps
186
+ <br/>
187
+
188
+ ## 💬 Start chatting
189
+
190
+ You can start chatting with your bot by visiting 👉 [https://t.me/yourbotsname](https://t.me/yourbotsname)
191
+
192
+ ![Telegram](https://github.com/paulpierre/RasaGPT/blob/main/github/telegram.png?raw=true)
193
+
194
+ <br/><br/>
195
+
196
+ ## 👀 View logs
197
+
198
+ You can view all of the log by visiting 👉 [https://localhost:9999/](https://localhost:9999/) which will displaying real-time logs of all the docker containers
199
+
200
+ ![Dozzle](https://github.com/paulpierre/RasaGPT/blob/main/github/container_logs.png?raw=true)
201
+
202
+ <br/><br/>
203
+
204
+ ## 📖 API documentation
205
+
206
+ View the API endpoint docs by visiting 👉 [https://localhost:8888/docs](https://localhost:8888/docs)
207
+
208
+ In this page you can create and update entities, as well as upload documents to the knowledge base.
209
+
210
+ ![Swagger Docs](https://github.com/paulpierre/RasaGPT/blob/main/github/api.png?raw=true)
211
+
212
+ <br/><br/>
213
+
214
+ # ✏️ Examples
215
+
216
+ The bot is just a proof-of-concept and has not been optimized for retrieval. It currently uses 1000 character length chunking for indexing and basic euclidean distance for retrieval and quality is hit or miss.
217
+
218
+ You can view example hits and misses with the bot in the [RESULTS.MD](https://github.com/paulpierre/RasaGPT/blob/main/RESULTS.md) file. Overall I estimate index optimization and LLM configuration changes can increase output quality by more than 70%.
219
+
220
+ <br/>
221
+
222
+ 👉 Click to see the [Q&A results of the demo data in RESULTS.MD](https://github.com/paulpierre/RasaGPT/blob/main/RESULTS.md)
223
+
224
+ <br/><br/>
225
+
226
+ # 💻 API Architecture and Usage
227
+
228
+ The REST API is straight forward, please visit the documentation 👉 http://localhost:8888/docs
229
+
230
+ The entities below have basic CRUD operations and return JSON
231
+
232
+ <br/><br/>
233
+
234
+ ## Organization
235
+
236
+ This can be thought of as a company that is your client in a SaaS / multi-tenant world. By default a list of dummy organizations have been provided
237
+
238
+ ![Screenshot 2023-05-05 at 8.45.28 AM.png](https://github.com/paulpierre/RasaGPT/blob/main/github/orgs.png?raw=true)
239
+
240
+ ```bash
241
+ [
242
+ {
243
+ "id": 1,
244
+ "uuid": "d2a642e6-c81a-4a43-83e2-22cee3562452",
245
+ "display_name": "Pepe Corp.",
246
+ "namespace": "pepe",
247
+ "bot_url": null,
248
+ "created_at": "2023-05-05T10:42:45.933976",
249
+ "updated_at": "2023-05-05T10:42:45.933979"
250
+ },
251
+ {
252
+ "id": 2,
253
+ "uuid": "7d574f88-6c0b-4c1f-9368-367956b0e90f",
254
+ "display_name": "Umbrella Corp",
255
+ "namespace": "acme",
256
+ "bot_url": null,
257
+ "created_at": "2023-05-05T10:43:03.555484",
258
+ "updated_at": "2023-05-05T10:43:03.555488"
259
+ },
260
+ {
261
+ "id": 3,
262
+ "uuid": "65105a15-2ef0-4898-ac7a-8eafee0b283d",
263
+ "display_name": "Cyberdine Systems",
264
+ "namespace": "cyberdine",
265
+ "bot_url": null,
266
+ "created_at": "2023-05-05T10:43:04.175424",
267
+ "updated_at": "2023-05-05T10:43:04.175428"
268
+ },
269
+ {
270
+ "id": 4,
271
+ "uuid": "b7fb966d-7845-4581-a537-818da62645b5",
272
+ "display_name": "Bluth Companies",
273
+ "namespace": "bluth",
274
+ "bot_url": null,
275
+ "created_at": "2023-05-05T10:43:04.697801",
276
+ "updated_at": "2023-05-05T10:43:04.697804"
277
+ },
278
+ {
279
+ "id": 5,
280
+ "uuid": "9283d017-b24b-4ecd-bf35-808b45e258cf",
281
+ "display_name": "Evil Corp",
282
+ "namespace": "evil",
283
+ "bot_url": null,
284
+ "created_at": "2023-05-05T10:43:05.102546",
285
+ "updated_at": "2023-05-05T10:43:05.102549"
286
+ }
287
+ ]
288
+ ```
289
+
290
+ <br/>
291
+
292
+ ### Project
293
+
294
+ This can be thought of as a product that belongs to a company. You can view the list of projects that belong to an organizations like so:
295
+
296
+ ![org-projects.png](https://github.com/paulpierre/RasaGPT/blob/main/github/org-projects.png?raw=true)
297
+
298
+ ```bash
299
+ [
300
+ {
301
+ "id": 1,
302
+ "documents": [
303
+ {
304
+ "id": 1,
305
+ "uuid": "92604623-e37c-4935-bf08-0e9efa8b62f7",
306
+ "display_name": "project-pepetamine.md",
307
+ "node_count": 3
308
+ }
309
+ ],
310
+ "document_count": 1,
311
+ "uuid": "44a4b60b-9280-4b21-a676-00612be9aa87",
312
+ "display_name": "Pepetamine",
313
+ "created_at": "2023-05-05T10:42:46.060930",
314
+ "updated_at": "2023-05-05T10:42:46.060934"
315
+ },
316
+ {
317
+ "id": 2,
318
+ "documents": [
319
+ {
320
+ "id": 2,
321
+ "uuid": "b408595a-3426-4011-9b9b-8e260b244f74",
322
+ "display_name": "project-frogonil.md",
323
+ "node_count": 3
324
+ }
325
+ ],
326
+ "document_count": 1,
327
+ "uuid": "5ba6b812-de37-451d-83a3-8ccccadabd69",
328
+ "display_name": "Frogonil",
329
+ "created_at": "2023-05-05T10:42:48.043936",
330
+ "updated_at": "2023-05-05T10:42:48.043940"
331
+ },
332
+ {
333
+ "id": 3,
334
+ "documents": [
335
+ {
336
+ "id": 3,
337
+ "uuid": "b99d373a-3317-4699-a89e-90897ba00db6",
338
+ "display_name": "project-kekzal.md",
339
+ "node_count": 3
340
+ }
341
+ ],
342
+ "document_count": 1,
343
+ "uuid": "1be4360c-f06e-4494-bf20-e7c73a56f003",
344
+ "display_name": "Kekzal",
345
+ "created_at": "2023-05-05T10:42:49.092675",
346
+ "updated_at": "2023-05-05T10:42:49.092678"
347
+ },
348
+ {
349
+ "id": 4,
350
+ "documents": [
351
+ {
352
+ "id": 4,
353
+ "uuid": "94da307b-5993-4ddd-a852-3d8c12f95f3f",
354
+ "display_name": "project-memetrex.md",
355
+ "node_count": 3
356
+ }
357
+ ],
358
+ "document_count": 1,
359
+ "uuid": "1fd7e772-365c-451b-a7eb-4d529b0927f0",
360
+ "display_name": "Memetrex",
361
+ "created_at": "2023-05-05T10:42:50.184817",
362
+ "updated_at": "2023-05-05T10:42:50.184821"
363
+ },
364
+ {
365
+ "id": 5,
366
+ "documents": [
367
+ {
368
+ "id": 5,
369
+ "uuid": "6deff180-3e3e-4b09-ae5a-6502d031914a",
370
+ "display_name": "project-pepetrak.md",
371
+ "node_count": 4
372
+ }
373
+ ],
374
+ "document_count": 1,
375
+ "uuid": "a389eb58-b504-48b4-9bc3-d3c93d2fbeaa",
376
+ "display_name": "PepeTrak",
377
+ "created_at": "2023-05-05T10:42:51.293352",
378
+ "updated_at": "2023-05-05T10:42:51.293355"
379
+ },
380
+ {
381
+ "id": 6,
382
+ "documents": [
383
+ {
384
+ "id": 6,
385
+ "uuid": "2e3c2155-cafa-4c6b-b7cc-02bb5156715b",
386
+ "display_name": "project-memegen.md",
387
+ "node_count": 5
388
+ }
389
+ ],
390
+ "document_count": 1,
391
+ "uuid": "cec4154f-5d73-41a5-a764-eaf62fc3db2c",
392
+ "display_name": "MemeGen",
393
+ "created_at": "2023-05-05T10:42:52.562037",
394
+ "updated_at": "2023-05-05T10:42:52.562040"
395
+ },
396
+ {
397
+ "id": 7,
398
+ "documents": [
399
+ {
400
+ "id": 7,
401
+ "uuid": "baabcb6f-e14c-4d59-a019-ce29973b9f5c",
402
+ "display_name": "project-neurokek.md",
403
+ "node_count": 5
404
+ }
405
+ ],
406
+ "document_count": 1,
407
+ "uuid": "4a1a0542-e314-4ae7-9961-720c2d092f04",
408
+ "display_name": "Neuro-kek",
409
+ "created_at": "2023-05-05T10:42:53.689537",
410
+ "updated_at": "2023-05-05T10:42:53.689539"
411
+ },
412
+ {
413
+ "id": 8,
414
+ "documents": [
415
+ {
416
+ "id": 8,
417
+ "uuid": "5be007ec-5c89-4bc4-8bfd-448a3659c03c",
418
+ "display_name": "org-about_the_company.md",
419
+ "node_count": 5
420
+ },
421
+ {
422
+ "id": 9,
423
+ "uuid": "c2b3fb39-18c0-4f3e-9c21-749b86942cba",
424
+ "display_name": "org-board_of_directors.md",
425
+ "node_count": 3
426
+ },
427
+ {
428
+ "id": 10,
429
+ "uuid": "41aa81a9-13a9-4527-a439-c2ac0215593f",
430
+ "display_name": "org-company_story.md",
431
+ "node_count": 4
432
+ },
433
+ {
434
+ "id": 11,
435
+ "uuid": "91c59eb8-8c05-4f1f-b09d-fcd9b44b5a20",
436
+ "display_name": "org-corporate_philosophy.md",
437
+ "node_count": 4
438
+ },
439
+ {
440
+ "id": 12,
441
+ "uuid": "631fc3a9-7f5f-4415-8283-78ff582be483",
442
+ "display_name": "org-customer_support.md",
443
+ "node_count": 3
444
+ },
445
+ {
446
+ "id": 13,
447
+ "uuid": "d4c3d3db-6f24-433e-b2aa-52a70a0af976",
448
+ "display_name": "org-earnings_fy2023.md",
449
+ "node_count": 5
450
+ },
451
+ {
452
+ "id": 14,
453
+ "uuid": "08dd478b-414b-46c4-95c0-4d96e2089e90",
454
+ "display_name": "org-management_team.md",
455
+ "node_count": 3
456
+ }
457
+ ],
458
+ "document_count": 7,
459
+ "uuid": "1d2849b4-2715-4dcf-aa68-090a221942ba",
460
+ "display_name": "Pepe Corp. (company)",
461
+ "created_at": "2023-05-05T10:42:55.258902",
462
+ "updated_at": "2023-05-05T10:42:55.258904"
463
+ }
464
+ ]
465
+ ```
466
+
467
+ <br/>
468
+
469
+ ## Document
470
+
471
+ This can be thought of as an artifact related to a product, like an FAQ page or a PDF with financial statement earnings. You can view all the Documents associated with an Organization’s Project like so:
472
+
473
+ ![documents.png](https://github.com/paulpierre/RasaGPT/blob/main/github/documents.png?raw=true)
474
+
475
+ ```bash
476
+ {
477
+ "id": 1,
478
+ "uuid": "44a4b60b-9280-4b21-a676-00612be9aa87",
479
+ "organization": {
480
+ "id": 1,
481
+ "uuid": "d2a642e6-c81a-4a43-83e2-22cee3562452",
482
+ "display_name": "Pepe Corp.",
483
+ "bot_url": null,
484
+ "status": 2,
485
+ "created_at": "2023-05-05T10:42:45.933976",
486
+ "updated_at": "2023-05-05T10:42:45.933979",
487
+ "namespace": "pepe"
488
+ },
489
+ "document_count": 1,
490
+ "documents": [
491
+ {
492
+ "id": 1,
493
+ "uuid": "92604623-e37c-4935-bf08-0e9efa8b62f7",
494
+ "organization_id": 1,
495
+ "project_id": 1,
496
+ "display_name": "project-pepetamine.md",
497
+ "url": "",
498
+ "data": "# Pepetamine\n\nProduct Name: Pepetamine\n\nPurpose: Increases cognitive focus just like the Limitless movie\n\n**How to Use**\n\nPepetamine is available in the form of rare Pepe-coated tablets. The recommended dosage is one tablet per day, taken orally with a glass of water, preferably while browsing your favorite meme forum for maximum cognitive enhancement. For optimal results, take Pepetamine 30 minutes before engaging in mentally demanding tasks, such as decoding ancient Pepe hieroglyphics or creating your next viral meme masterpiece.\n\n**Side Effects**\n\nSome potential side effects of Pepetamine may include:\n\n1. Uncontrollable laughter and a sudden appreciation for dank memes\n2. An inexplicable desire to collect rare Pepes\n3. Enhanced meme creation skills, potentially leading to internet fame\n4. Temporary green skin pigmentation, resembling the legendary Pepe himself\n5. Spontaneously speaking in \"feels good man\" language\n\nWhile most side effects are generally harmless, consult your memologist if side effects persist or become bothersome.\n\n**Precautions**\n\nBefore taking Pepetamine, please consider the following precautions:\n\n1. Do not use Pepetamine if you have a known allergy to rare Pepes or dank memes.\n2. Pepetamine may not be suitable for individuals with a history of humor deficiency or meme intolerance.\n3. Exercise caution when driving or operating heavy machinery, as Pepetamine may cause sudden fits of laughter or intense meme ideation.\n\n**Interactions**\n\nPepetamine may interact with other substances, including:\n\n1. Normie supplements: Combining Pepetamine with normie supplements may result in meme conflicts and a decreased sense of humor.\n2. Caffeine: The combination of Pepetamine and caffeine may cause an overload of energy, resulting in hyperactive meme creation and potential internet overload.\n\nConsult your memologist if you are taking any other medications or substances to ensure compatibility with Pepetamine.\n\n**Overdose**\n\nIn case of an overdose, symptoms may include:\n\n1. Uncontrollable meme creation\n2. Delusions of grandeur as the ultimate meme lord\n3. Time warps into the world of Pepe\n\nIf you suspect an overdose, contact your local meme emergency service or visit the nearest meme treatment facility. Remember, the key to enjoying Pepetamine is to use it responsibly, and always keep in mind the wise words of our legendary Pepe: \"Feels good man.\"",
499
+ "hash": "fdee6da2b5441080dd78e7850d3d2e1403bae71b9e0526b9dcae4c0782d95a78",
500
+ "version": 1,
501
+ "status": 2,
502
+ "created_at": "2023-05-05T10:42:46.755428",
503
+ "updated_at": "2023-05-05T10:42:46.755431"
504
+ }
505
+ ],
506
+ "display_name": "Pepetamine",
507
+ "created_at": "2023-05-05T10:42:46.060930",
508
+ "updated_at": "2023-05-05T10:42:46.060934"
509
+ }
510
+ ```
511
+
512
+ <br/>
513
+
514
+ ## Node
515
+
516
+ Although this is not exposed in the API, a node is a chunk of a document which embeddings get generated for. Nodes are used for retrieval search as well as context injection. A node belongs to a document.
517
+
518
+ <br/>
519
+
520
+ ## User
521
+
522
+ A user represents the person talking to a bot. Users do not necessarily belong to an org or product, but this relationship is captured in ChatSession below.
523
+
524
+ <br/>
525
+
526
+ ## ChatSession
527
+
528
+ Not exposed via API, but this represent a question and answer between the User and a bot. Each of these objects can be flexibly identified by a `session_id` which gets automatically generated. Chat Sessions contain rich metadata that can be used for training and optimization. ChatSessions via the `/chat` endpoint ARE in fact associated with organization (for multi-tenant security purposes)
529
+
530
+ <br/><br/>
531
+
532
+ # **📚 How it works**
533
+
534
+ <br/>
535
+
536
+ ## Rasa
537
+
538
+ 1. Rasa handles integration with the communication channel, in this case Telegram.
539
+ - It specifically handles submitting the target webhook user feedback should go through. In our case it is our FastAPI server via `/webhooks/{channel}/webhook`
540
+ 2. Rasa has two components, the core [Rasa app](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa) and an Rasa [actions server](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa/actions) that runs separately
541
+ 3. Rasa must be configured (done already) via a few yaml files:
542
+ - [config.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/config.yml) - contains NLU pipeline and policy configuration. What matters is setting the `FallbackClassifier` threshold
543
+ - [credentials.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/credentials.yml) - contains the path to our webhook and Telegram credentials. This will get updated by the helper service `rasa-credentials` via [app/rasa-credentials/main.py](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa-credentials/main.py)
544
+ - [domain.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/domain.yml) - This contains the chat entrypoint logic configuration like intent and the action to take against the intent. Here we add the `action_gpt_fallback` action which will trigger our [actions server](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa/actions)
545
+ - [endpoints.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/endpoints.yml) - This is where we set our custom action end-point for Rasa to trigger our fallback
546
+ - [nlu.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/nlu.yml) - this is where we set our intent `out_of_scope`
547
+ - [rules.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/rules.yml) - we set a rule for this intent that it should trigger the action `action_gpt_fallback`
548
+ - [actions.py](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/actions/actions.py) - this is where we define and express our action via the `ActionGPTFallback` class. The method `name` returns the action we defined for our intent above
549
+ 4. Rasa's NLU models must be trained which can be done via CLI with `rasa train` . This is done automatically for you when you run `make install`
550
+ 5. Rasa's core must be ran via `rasa run` after training
551
+ 6. Rasa's action server must be ran separately with `rasa run actions`
552
+
553
+ <br/>
554
+
555
+ ## Telegram
556
+
557
+ 1. Rasa automatically updates the Telegram Bot API with your callback webhook from [credentials.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/credentials.yml).
558
+ 2. By default this is static. Since we are running on our local machine, we leverage [Ngrok](https://ngrok.com/) to generate a publically accessible URL and reverse tunnel into our docker container
559
+ 3. `rasa-credentials` service takes care of this process for you. Ngrok runs as a service, once it is ready `rasa-credentials` calls the local ngrok API to retrieve the tunnel URL and updates the `credentials.yml` file and restarts Rasa for you
560
+ 4. The webhook Telegram will send messages to will be our FastAPI server. Why this instead of Rasa? Because we want flexibility to capture metadata which Rasa makes a PITA and centralizing to the API server is ideal
561
+ 5. The FastAPI server forwards this to the Rasa webhook
562
+ 6. Rasa will then determine what action to take based on the user intent. Since the intents have been nerfed for this demo, it will go to the fallback action running in `actions.py`
563
+ 7. The custom action will capture the metadata and forward the response from FastAPI to the user
564
+
565
+ <br/>
566
+
567
+ ## PGVector
568
+
569
+ `pgvector` is a plugin for Postgres and automatically installed enabling your to store and calculate vector data types. We have our own implementation because the Langchain PGVector class is not flexible to adapt to our schema and we want flexibility.
570
+
571
+ 1. By default in postgres, any files in the container's path `/docker-entry-initdb.d` get run if the database has not been initialized. In the [postgres Dockerfile](https://github.com/paulpierre/RasaGPT/blob/main/app/db/Dockerfile) we copy []`create_db.sh` which creates](https://github.com/paulpierre/RasaGPT/blob/main/app/db/create_db.sh) the db and user for our database
572
+ 2. In the [`models` command](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/Makefile#L64) in the [Makefile](https://github.com/paulpierre/RasaGPT/blob/main/Makefile), we run the [models.py](https://github.com/paulpierre/RasaGPT/blob/main/app/api/models.py) in the API container which creates the tables from the models.
573
+ 3. The [`enable_vector` method](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/models.py#L266) enables the pgvector extension in the database
574
+
575
+ <br/>
576
+
577
+ ## Langchain
578
+
579
+ 1. The training data gets loaded in the database
580
+ 2. The data is indexed [if the index doesn't exist](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/main.py#L49) and [stored in a file named `index.json`](https://github.com/paulpierre/RasaGPT/blob/main/app/api/index.json)
581
+ 3. LlamaIndex uses a basic `GPTSimpleVectorIndex` to find the relevant data and [injects it into a prompt](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/main.py#L66).
582
+ 4. Guard rails via prompts are used to keep the conversation focused
583
+
584
+ <br/>
585
+
586
+ ## Bot flow
587
+
588
+ 1. The user will chat in Telegram and the message will be filtered for [existing intents](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/nlu.yml)
589
+ 2. If it detects there is no intent match but instead matches the `out_of_scope`, [based on rules.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/rules.yml) it will trigger the `action_gpt_fallback` action
590
+ 3. The [`ActionGPTFallback` function](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/actions/actions.py) will then call the [FastAPI API server](https://github.com/paulpierre/RasaGPT/blob/main/app/api/main.py)
591
+ 4. the API using LlamaIndex will find the relevant indexed content and inject it into a prompt to send to OpenAI for inference
592
+ 5. The prompt contains conversational guardrails including:
593
+ - Requests data be returned in JSON
594
+ - Create categorical tags based on what the user's question
595
+ - Return a boolean if the conversation should be escalated to a human (if there is no context match)
596
+
597
+ <br/><br/>
598
+
599
+
600
+ # 📝 TODO
601
+ - [ ] Write tests 😅
602
+ - [ ] Implement LlamaIndex optimizations
603
+ - [ ] Implement chat history
604
+ - [ ] Implement [Query Routers Abstractions](https://medium.com/@jerryjliu98/unifying-llm-powered-qa-techniques-with-routing-abstractions-438e2499a0d0) to understand which search strategy to use (one-shot vs few-shot)
605
+ - [ ] Explore other indexing methods like Tree indexes, Keyword indexes
606
+ - [ ] Add chat history for immediate recall and context setting
607
+ - [ ] Add a secondary adversarial agent ([Dual pattern model](https://simonwillison.net/2023/Apr/25/dual-llm-pattern/)) with the following potential functionalities:
608
+ - [ ] Determine if the question has been answered and if not, re-optimize search strategy
609
+ - [ ] Ensure prompt injection is not occurring
610
+ - [ ] Increase baseline similarity search by exploring:
611
+ - [ ] Regularly generate “fake” document embeddings based on historical queries and link to actual documents via [HyDE pattern](https://wfhbrian.com/revolutionizing-search-how-hypothetical-document-embeddings-hyde-can-save-time-and-increase-productivity/)
612
+ - [ ] Regularly generate “fake” user queries based on documents and link to actual document so user input search and “fake” queries can match better
613
+
614
+
615
+ <br/><br/>
616
+
617
+ # 🔍 Troubleshooting
618
+
619
+ In general, check your docker container logs by simply going to 👉 http://localhost:9999/
620
+
621
+ <br/>
622
+
623
+ ## Ngrok issues
624
+
625
+ Always check that your webhooks with ngrok and Telegram match. Simply do this by
626
+
627
+ ```bash
628
+ curl -sS "https://api.telegram.org/bot<your-bot-secret-token>/getWebhookInfo" | json_pp
629
+ ```
630
+
631
+ <br/>
632
+
633
+ .. should return this:
634
+
635
+ ```bash
636
+ {
637
+ "ok": true,
638
+ "result": {
639
+ "url": "https://b280-04-115-40-112.ngrok-free.app/webhooks/telegram/webhook",
640
+ "has_custom_certificate": false,
641
+ "pending_update_count": 0,
642
+ "max_connections": 40,
643
+ "ip_address": "1.2.3.4"
644
+ }
645
+ }
646
+ ```
647
+
648
+ <br/>
649
+
650
+ .. which should match the URL in your `credentials.yml` file or visit the Ngrok admin UI 👉 [http://localhost:4040/status](http://localhost:4040/status)
651
+
652
+ ![ngrok-admin.png](https://github.com/paulpierre/RasaGPT/blob/main/github/ngrok-admin.png?raw=true)
653
+
654
+ <br/>
655
+
656
+ Looks like it is a match. If not, restart everything by running:
657
+
658
+ ```bash
659
+ make restart
660
+ ```
661
+
662
+ <br/><br/>
663
+
664
+ # 💪 Contributing / Issues
665
+
666
+ - Pull requests welcome
667
+ - Please submit issues via Github, I will do my best to resolve them
668
+ - If you want to get in touch, feel free to hmu on twitter via [`@paulpierre`](https://twitter.com/paulpierre)`
669
+
670
+ <br/><br/>
671
+
672
+ > ![thumbsup](https://camo.githubusercontent.com/bcb43227c1e90a1d27996eb75ac794bbf20d1355b36d0e9eaa71c71ad4dd2a56/68747470733a2f2f6d65646961342e67697068792e636f6d2f6d656469612f313149537762674378457a4d79592f67697068792e6769663f6369643d65636630356534376664703164727a72766178733175787532666269376f72316e68626f6d39326d30346436306e786b2665703d76315f676966735f72656c61746564267269643d67697068792e6769662663743d67)
673
+ > <br/> Congratulations, all your base are belong to us! kthxbye
674
+
675
+ <br/><br/>
676
+
677
+ # 📜 Open source license
678
+
679
+ Copyright (c) 2023 Paul Pierre. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
RESULTS.md ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ![RasaGPT Logo](https://github.com/paulpierre/RasaGPT/blob/main/github/rasagpt-logo-1.png?raw=true)
3
+
4
+ <br/><br/>
5
+
6
+ # 📊 RasaGPT Query Results
7
+
8
+ The following are questions based on the example dummy data included in the RasaGPT repository. The response framing is great, but retrieval quality is not sufficient due to indexing. Here are some examples of Q&A hits and misses w/ some commentary.
9
+
10
+ <br/><br/>
11
+
12
+ # ☑️ Hits
13
+
14
+
15
+
16
+ <br/>
17
+
18
+ ### ✅ “Tell me about Pepe Corp”
19
+
20
+ ```bash
21
+ Pepe Corp is a groundbreaking parody pharmaceutical and technology company inspired by the iconic Pepe the Frog meme. Our mission is to revolutionize the world through the power of dank memes.
22
+ 🏷️ company,mission
23
+ ```
24
+
25
+ <br/>
26
+
27
+ ### ✅ “What is the weather like in London today?” (It’s expected to say I don’t know)
28
+
29
+ ```bash
30
+ I apologize, I'm not sure how to help with that
31
+ 🏷️ weather
32
+ ```
33
+
34
+ <br/>
35
+
36
+ ### ✅ “What is the purpose of Kekzal?”
37
+
38
+ ```bash
39
+ Kekzal enhances sexual function.
40
+ 🏷️ purpose
41
+ ```
42
+
43
+ <br/>
44
+
45
+ ### ✅ “What is Kekzal and what are the side effects?”
46
+
47
+ ```bash
48
+ Kekzal is a medication, but the document does not provide information on its side effects. It only advises to consult a memologist if side effects persist or become bothersome.
49
+ 🏷️ Kekzal,side effects
50
+ ```
51
+
52
+ >- It probably should not mention “the document” and this can be optimized by modifying the prompt template
53
+
54
+ <br/>
55
+
56
+ ### ✅ “What should I consider before taking Kekzal?”
57
+
58
+ ```bash
59
+ Before taking Kekzal, consider the following precautions: Do not use Kekzal if you have a known allergy to dank memes or an aversion to Pepe-themed roman.
60
+ 🏷️ precautions
61
+ ```
62
+
63
+ >- This is technically a hit, but because of the chunking settings, the original text gets cut off and says “roman” instead of “romantic..” so this is easily fixed
64
+ >- Ideally it should include “Kekzal” in the tags
65
+
66
+ <br/>
67
+
68
+ ### ✅ “What are the overdose symptoms of Kekzal?”
69
+
70
+ ```bash
71
+ In case of an overdose, symptoms may include: 1. An insatiable desire to create
72
+ 🏷️ overdose,symptoms
73
+ ```
74
+
75
+ - This is a technical hit, but again indexing cut it off and it is likely due to the configured output limits which is easily fixed
76
+
77
+ <br/>
78
+
79
+ ### ✅ “Who is the CEO?”
80
+
81
+ ```bash
82
+ The CEO of Pepe Corp is Amanda Gigglestein.
83
+ 🏷️ CEO
84
+ ```
85
+
86
+ <br/>
87
+
88
+ ### ✅ “What are the 2023 plans of Pepe Corp?”
89
+
90
+ ```bash
91
+ Pepe Corp plans to invest in research and development to create cutting-edge products that address the needs of our diverse customer base. Our R&D team will explore new areas of meme science and technology, developing products that meet the needs of our customers.
92
+ 🏷️ plans,research and development,cutting-edge products,meme science,technology,customer needs
93
+ ```
94
+
95
+ <br/>
96
+
97
+ ### ✅ “What is the revenue for Q1 FY2023?”
98
+
99
+ ```bash
100
+ $235 million
101
+ 🏷️ revenue,Q1,FY2023
102
+ ```
103
+
104
+ <br/>
105
+
106
+ ### ✅ “Tell me about the early days of PepeCorp?”
107
+
108
+ ```bash
109
+ In the early 21st century, a group of visionary memelords gathered in a dimly lit basement, fueled by their passion for dank memes and the iconic Pepe the Frog.
110
+ 🏷️ company history,early days
111
+ ```
112
+
113
+ <br/><br/>
114
+
115
+ # ❌ Misses
116
+
117
+
118
+ ### ❌ “What are the side effects of Kekzal?”
119
+
120
+ ```bash
121
+ Consult your memologist if side effects persist or become bothersome.
122
+ 🏷️ Kekzal,side effects
123
+ ```
124
+
125
+ >- The side effects are in the documentation but it misses it entirely. It did tag the product name unlike earlier sessions, but index optimization can fix this
126
+
127
+ <br/>
128
+
129
+ ### ❌ “Who is Dr. Olivia Dankstein?”
130
+
131
+ ```bash
132
+ I apologize, I'm not sure how to help with that.
133
+ 🏷️ unknown
134
+ ```
135
+
136
+ >- Not good, she is in the Board of Directors document and is Vice-Chair and Chief Scientific Officer of Pepe Corp. Better indexing strategies fix this.
137
+
138
+ <br/>
139
+
140
+
141
+ ### ❌ “What is the list of products provided by Pepe Corp?”
142
+
143
+ ```bash
144
+ Pepe Corp has a diverse portfolio of products, ranging from meme-inspired pharmaceuticals to advanced brain implants and mobile applications.
145
+ 🏷️ products
146
+ ```
147
+
148
+ >- Although it did answer, it is insufficient. This is where a tree index with summarization would help
149
+
150
+ <br/><br/>
151
+
152
+ # 🔬 Retrieval optimizations
153
+
154
+ So how could all of this be improved by what I estimate to be more than 70%+? Here are a few solutions I intend to explore:
155
+
156
+
157
+ - Better index-tuning at the LLM library level, specifically using llama-index instead of Langchain which specializes in retrieval
158
+ - Usage of [Query Routers Abstractions](https://medium.com/@jerryjliu98/unifying-llm-powered-qa-techniques-with-routing-abstractions-438e2499a0d0) to understand which search strategy to use (one-shot vs few-shot)
159
+ - Explore other indexing methods like Tree indexes, Keyword indexes
160
+ - Adding chat history for immediate recall and context setting
161
+ - Add a secondary adversarial agent ([Dual pattern model](https://simonwillison.net/2023/Apr/25/dual-llm-pattern/)) with the following potential functionalities:
162
+ - Determine if the question has been answered and if not, re-optimize search strategy
163
+ - Ensure prompt injection is not occurring
164
+ - Increase baseline similarity search by exploring:
165
+ - Regularly generate “fake” document embeddings based on historical queries and link to actual documents via [HyDE pattern](https://wfhbrian.com/revolutionizing-search-how-hypothetical-document-embeddings-hyde-can-save-time-and-increase-productivity/)
166
+ - Regularly generate “fake” user queries based on documents and link to actual document so user input search and “fake” queries can match better
app/api/.dockerignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
app/api/API.md ADDED
@@ -0,0 +1,666 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # 🏠 Overview
4
+
5
+ 💬 RasaGPT is the first headless LLM chatbot platform built on top of Rasa and Langchain. It is boilerplate and a reference implementation of Rasa and Telegram utilizing an LLM library like langchain for indexing, retrieval and context injection.
6
+
7
+ <br/><br/>
8
+
9
+ # 💁‍♀️ Why RasaGPT?
10
+
11
+ RasaGPT just works out of the box. I went through all the implementation headaches so you don’t have to, including
12
+
13
+ - Creating your own proprietary bot end-point using FastAPI, document upload and “training” 'pipeline included
14
+ - How to integrate Langchain/LlamaIndex and Rasa
15
+ - Library conflicts with LLM libraries and passing metadata
16
+ - Dockerized support on MacOS for running Rasa
17
+ - Reverse proxy with chatbots via ngrok
18
+ - Implementing pgvector with your own custom schema instead of using Langchain’s highly opinionated PGVector class
19
+ - Adding multi-tenancy, sessions and metadata between Rasa and your own backend / application
20
+
21
+ The backstory is familiar. A friend came to me with a problem. I scoured Google and Github for a decent reference implementation of LLM’s integrated with Rasa but came up empty-handed. I figured this to be a great opportunity to satiate my curiosity and 2 days later I had a proof of concept, and a week later this is what I came up with.
22
+
23
+ <br/>
24
+
25
+ > ⚠️ **Caveat emptor:**
26
+ This is far from production code and rife with prompt injection and general security vulnerabilities. I just hope someone finds this useful 😊
27
+
28
+
29
+ <br/><br/>
30
+
31
+ # **✨** Quick start
32
+
33
+ Getting started is easy, just make sure you meet the dependencies below.
34
+
35
+ ```bash
36
+ git clone https://github.com/paulpierre/RasaGPT.git
37
+ cd RasaGPT
38
+ cp .env-example .env
39
+
40
+ # Edit your .env file and add all the necessary credentials
41
+ make install
42
+
43
+ # Type "make" to see more options
44
+ make
45
+ ```
46
+
47
+ <br/><br/>
48
+
49
+
50
+ # 🔥 Features
51
+
52
+ ## Full Application and API
53
+
54
+ - LLM “learns” on an arbitrary corpus of data using Langchain
55
+ - Upload documents and “train” all via FastAPI
56
+ - Document versioning and automatic “re-training” implemented on upload
57
+ - Customize your own async end-points
58
+ - Bot determines whether human handoff is necessary
59
+ - Bot generates tags based on user questions and response automatically
60
+ - Full API documentation via Swagger and Redoc included
61
+ - Ngrok end-points are automatically generated for you on startup so your bot can always be accessed via `https://t.me/yourbotname`
62
+ - Embedding similarity search built into Postgres via pgvector and Postgres functions
63
+ - Dummy data included for you to test and experiment
64
+ - Unlimited use cases from help desk, customer support, quiz, e-learning, dungeon and dragons, and more
65
+ <br/><br/>
66
+ ## Rasa integration
67
+
68
+ - Built on top of Rasa, the open source gold-standard for chat platforms
69
+ - Supports MacOS M1/M2 via Docker (canonical Rasa image lacks MacOS arch. support)
70
+ - Supports Telegram, easily integrate Slack, Whatsapp, Line, SMS, etc.
71
+ - Setup complex dialog pipelines using NLU models form Huggingface like BERT or libraries/frameworks like Keras, Tensorflow with OpenAI GPT as fallback
72
+ <br/><br/>
73
+ ## Flexibility
74
+
75
+ - Extend agentic, memory, etc. capabilities with Langchain
76
+ - Schema supports multi-tenancy, sessions, data storage
77
+ - Customize agent personalities
78
+ - Saves all of chat history and creating embeddings from all interactions future-proofing your retrieval strategy
79
+ - Automatically generate embeddings from knowledge base corpus and client feedback
80
+
81
+ <br/><br/>
82
+
83
+ # 🧑‍💻 Installing
84
+
85
+ ## Requirements
86
+
87
+ - Python 3.9
88
+ - Docker & Docker compose ([Docker desktop MacOS](https://www.docker.com/products/docker-desktop/))
89
+ - Open AI [API key](https://platform.openai.com/account/api-keys)
90
+ - Telegram [bot credentials](https://core.telegram.org/bots#how-do-i-create-a-bot)
91
+ - Ngrok [auth token](https://dashboard.ngrok.com/tunnels/authtokens)
92
+ - Make ([MacOS](https://formulae.brew.sh/formula/make)/[Windows](https://stackoverflow.com/questions/32127524/how-to-install-and-use-make-in-windows))
93
+ - SQLModel
94
+
95
+ <br/>
96
+
97
+ ## Setup
98
+
99
+ ```bash
100
+ git clone https://github.com/paulpierre/RasaGPT.git
101
+ cd RasaGPT
102
+ cp .env-example .env
103
+
104
+ # Edit your .env file and all the credentials
105
+
106
+ ```
107
+
108
+ <br/>
109
+
110
+
111
+ At any point feel free to just type in `make` and it will display the list of options, mostly useful for debugging:
112
+
113
+ <br/>
114
+
115
+
116
+ ![Makefile main](https://github.com/paulpierre/RasaGPT/blob/main/github/makefile-1.png?raw=true)
117
+
118
+ <br/>
119
+
120
+ ## Docker-compose
121
+
122
+ ```bash
123
+ make install
124
+
125
+ # This will automatically install and run RasaGPT
126
+ # After installation, to run again you can simply run
127
+
128
+ make run
129
+ ```
130
+ <br/>
131
+
132
+ ## Local Python Environment
133
+
134
+ This is useful if you wish to focus on developing on top of the API, a separate `Makefile` was made for this. This will create a local virtual environment for you.
135
+
136
+ ```bash
137
+ # Assuming you are already in the RasaGPT directory
138
+ cd app/api
139
+ make install
140
+
141
+ # This will automatically install and run RasaGPT
142
+ # After installation, to run again you can simply run
143
+
144
+ make run
145
+ ```
146
+ <br/>
147
+
148
+ Similarly, enter `make` to see a full list of commands
149
+
150
+ ![Makefile API](https://github.com/paulpierre/RasaGPT/blob/main/github/makefile-2.png?raw=true)
151
+
152
+ <br/>
153
+
154
+ ## Installation process
155
+
156
+ Installation should be automated should look like this:
157
+
158
+ ![Installation](https://github.com/paulpierre/RasaGPT/blob/main/github/cli_install.png?raw=true)
159
+
160
+ 👉 Full installation log: [https://app.warp.dev/block/vflua6Eue29EPk8EVvW8Kd](https://app.warp.dev/block/vflua6Eue29EPk8EVvW8Kd)
161
+
162
+ <br/>
163
+
164
+ The installation process for Docker takes the following steps at a high level
165
+
166
+ 1. Check to make sure you have `.env` available
167
+ 2. Database is initialized with `pgvector`
168
+ 3. Database models create the database schema
169
+ 4. Trains the Rasa model so it is ready to run
170
+ 5. Sets up ngrok with Rasa so Telegram has a webhook back to your API server
171
+ 6. Sets up the Rasa actions server so Rasa can talk to the RasaGPT API
172
+ 7. Database is populated with dummy data via `seed.py`
173
+
174
+ <br/><br/>
175
+
176
+ # ☑️ Next steps
177
+ <br/>
178
+
179
+ ## 💬 Start chatting
180
+
181
+ You can start chatting with your bot by visiting https://t.me/yourbotsname
182
+
183
+ ![Telegram](https://github.com/paulpierre/RasaGPT/blob/main/github/telegram.png?raw=true)
184
+
185
+ <br/><br/>
186
+
187
+ ## 👀 View logs
188
+
189
+ You can view all of the log by visiting: [https://localhost:9999/](https://localhost:9999/) which will displaying real-time logs of all the docker containers
190
+
191
+ ![Dozzle](https://github.com/paulpierre/RasaGPT/blob/main/github/container_logs.png?raw=true)
192
+
193
+ <br/><br/>
194
+
195
+ ## 📖 API documentation
196
+
197
+ View the API endpoint docs by visiting [https://localhost:8888/docs](https://localhost:8888/docs)
198
+
199
+ ![Swagger Docs](https://github.com/paulpierre/RasaGPT/blob/main/github/api.png?raw=true)
200
+
201
+ <br/><br/>
202
+
203
+ # ✏️ Examples
204
+
205
+ The bot is just a proof-of-concept and has not been optimized for retrieval. It currently uses 1000 character length chunking for indexing and basic euclidean distance for retrieval and quality is hit or miss.
206
+
207
+ You can view example hits and misses with the bot in the [RESULTS.MD](https://github.com/paulpierre/RasaGPT/blob/main/RESULTS.md) file. Overall I estimate index optimization and LLM configuration changes can increase output quality by more than 70%.
208
+
209
+ 👉 Click to see the Q&A results of the demo data in RESULTS.MD
210
+
211
+ <br/><br/>
212
+
213
+ # 💻 API Architecture and Usage
214
+
215
+ The REST API is straight forward, please visit the documentation 👉 http://localhost:8888/docs
216
+
217
+ The entities below have basic CRUD operations and return JSON
218
+
219
+ <br/><br/>
220
+
221
+ ## Organization
222
+
223
+ This can be thought of as a company that is your client in a SaaS / multi-tenant world. By default a list of dummy organizations have been provided
224
+
225
+ ![Screenshot 2023-05-05 at 8.45.28 AM.png](https://github.com/paulpierre/RasaGPT/blob/main/github/orgs.png?raw=true)
226
+
227
+ ```bash
228
+ [
229
+ {
230
+ "id": 1,
231
+ "uuid": "d2a642e6-c81a-4a43-83e2-22cee3562452",
232
+ "display_name": "Pepe Corp.",
233
+ "namespace": "pepe",
234
+ "bot_url": null,
235
+ "created_at": "2023-05-05T10:42:45.933976",
236
+ "updated_at": "2023-05-05T10:42:45.933979"
237
+ },
238
+ {
239
+ "id": 2,
240
+ "uuid": "7d574f88-6c0b-4c1f-9368-367956b0e90f",
241
+ "display_name": "Umbrella Corp",
242
+ "namespace": "acme",
243
+ "bot_url": null,
244
+ "created_at": "2023-05-05T10:43:03.555484",
245
+ "updated_at": "2023-05-05T10:43:03.555488"
246
+ },
247
+ {
248
+ "id": 3,
249
+ "uuid": "65105a15-2ef0-4898-ac7a-8eafee0b283d",
250
+ "display_name": "Cyberdine Systems",
251
+ "namespace": "cyberdine",
252
+ "bot_url": null,
253
+ "created_at": "2023-05-05T10:43:04.175424",
254
+ "updated_at": "2023-05-05T10:43:04.175428"
255
+ },
256
+ {
257
+ "id": 4,
258
+ "uuid": "b7fb966d-7845-4581-a537-818da62645b5",
259
+ "display_name": "Bluth Companies",
260
+ "namespace": "bluth",
261
+ "bot_url": null,
262
+ "created_at": "2023-05-05T10:43:04.697801",
263
+ "updated_at": "2023-05-05T10:43:04.697804"
264
+ },
265
+ {
266
+ "id": 5,
267
+ "uuid": "9283d017-b24b-4ecd-bf35-808b45e258cf",
268
+ "display_name": "Evil Corp",
269
+ "namespace": "evil",
270
+ "bot_url": null,
271
+ "created_at": "2023-05-05T10:43:05.102546",
272
+ "updated_at": "2023-05-05T10:43:05.102549"
273
+ }
274
+ ]
275
+ ```
276
+
277
+ <br/>
278
+
279
+ ### Project
280
+
281
+ This can be thought of as a product that belongs to a company. You can view the list of projects that belong to an organizations like so:
282
+
283
+ ![org-projects.png](https://github.com/paulpierre/RasaGPT/blob/main/github/org-projects.png?raw=true)
284
+
285
+ ```bash
286
+ [
287
+ {
288
+ "id": 1,
289
+ "documents": [
290
+ {
291
+ "id": 1,
292
+ "uuid": "92604623-e37c-4935-bf08-0e9efa8b62f7",
293
+ "display_name": "project-pepetamine.md",
294
+ "node_count": 3
295
+ }
296
+ ],
297
+ "document_count": 1,
298
+ "uuid": "44a4b60b-9280-4b21-a676-00612be9aa87",
299
+ "display_name": "Pepetamine",
300
+ "created_at": "2023-05-05T10:42:46.060930",
301
+ "updated_at": "2023-05-05T10:42:46.060934"
302
+ },
303
+ {
304
+ "id": 2,
305
+ "documents": [
306
+ {
307
+ "id": 2,
308
+ "uuid": "b408595a-3426-4011-9b9b-8e260b244f74",
309
+ "display_name": "project-frogonil.md",
310
+ "node_count": 3
311
+ }
312
+ ],
313
+ "document_count": 1,
314
+ "uuid": "5ba6b812-de37-451d-83a3-8ccccadabd69",
315
+ "display_name": "Frogonil",
316
+ "created_at": "2023-05-05T10:42:48.043936",
317
+ "updated_at": "2023-05-05T10:42:48.043940"
318
+ },
319
+ {
320
+ "id": 3,
321
+ "documents": [
322
+ {
323
+ "id": 3,
324
+ "uuid": "b99d373a-3317-4699-a89e-90897ba00db6",
325
+ "display_name": "project-kekzal.md",
326
+ "node_count": 3
327
+ }
328
+ ],
329
+ "document_count": 1,
330
+ "uuid": "1be4360c-f06e-4494-bf20-e7c73a56f003",
331
+ "display_name": "Kekzal",
332
+ "created_at": "2023-05-05T10:42:49.092675",
333
+ "updated_at": "2023-05-05T10:42:49.092678"
334
+ },
335
+ {
336
+ "id": 4,
337
+ "documents": [
338
+ {
339
+ "id": 4,
340
+ "uuid": "94da307b-5993-4ddd-a852-3d8c12f95f3f",
341
+ "display_name": "project-memetrex.md",
342
+ "node_count": 3
343
+ }
344
+ ],
345
+ "document_count": 1,
346
+ "uuid": "1fd7e772-365c-451b-a7eb-4d529b0927f0",
347
+ "display_name": "Memetrex",
348
+ "created_at": "2023-05-05T10:42:50.184817",
349
+ "updated_at": "2023-05-05T10:42:50.184821"
350
+ },
351
+ {
352
+ "id": 5,
353
+ "documents": [
354
+ {
355
+ "id": 5,
356
+ "uuid": "6deff180-3e3e-4b09-ae5a-6502d031914a",
357
+ "display_name": "project-pepetrak.md",
358
+ "node_count": 4
359
+ }
360
+ ],
361
+ "document_count": 1,
362
+ "uuid": "a389eb58-b504-48b4-9bc3-d3c93d2fbeaa",
363
+ "display_name": "PepeTrak",
364
+ "created_at": "2023-05-05T10:42:51.293352",
365
+ "updated_at": "2023-05-05T10:42:51.293355"
366
+ },
367
+ {
368
+ "id": 6,
369
+ "documents": [
370
+ {
371
+ "id": 6,
372
+ "uuid": "2e3c2155-cafa-4c6b-b7cc-02bb5156715b",
373
+ "display_name": "project-memegen.md",
374
+ "node_count": 5
375
+ }
376
+ ],
377
+ "document_count": 1,
378
+ "uuid": "cec4154f-5d73-41a5-a764-eaf62fc3db2c",
379
+ "display_name": "MemeGen",
380
+ "created_at": "2023-05-05T10:42:52.562037",
381
+ "updated_at": "2023-05-05T10:42:52.562040"
382
+ },
383
+ {
384
+ "id": 7,
385
+ "documents": [
386
+ {
387
+ "id": 7,
388
+ "uuid": "baabcb6f-e14c-4d59-a019-ce29973b9f5c",
389
+ "display_name": "project-neurokek.md",
390
+ "node_count": 5
391
+ }
392
+ ],
393
+ "document_count": 1,
394
+ "uuid": "4a1a0542-e314-4ae7-9961-720c2d092f04",
395
+ "display_name": "Neuro-kek",
396
+ "created_at": "2023-05-05T10:42:53.689537",
397
+ "updated_at": "2023-05-05T10:42:53.689539"
398
+ },
399
+ {
400
+ "id": 8,
401
+ "documents": [
402
+ {
403
+ "id": 8,
404
+ "uuid": "5be007ec-5c89-4bc4-8bfd-448a3659c03c",
405
+ "display_name": "org-about_the_company.md",
406
+ "node_count": 5
407
+ },
408
+ {
409
+ "id": 9,
410
+ "uuid": "c2b3fb39-18c0-4f3e-9c21-749b86942cba",
411
+ "display_name": "org-board_of_directors.md",
412
+ "node_count": 3
413
+ },
414
+ {
415
+ "id": 10,
416
+ "uuid": "41aa81a9-13a9-4527-a439-c2ac0215593f",
417
+ "display_name": "org-company_story.md",
418
+ "node_count": 4
419
+ },
420
+ {
421
+ "id": 11,
422
+ "uuid": "91c59eb8-8c05-4f1f-b09d-fcd9b44b5a20",
423
+ "display_name": "org-corporate_philosophy.md",
424
+ "node_count": 4
425
+ },
426
+ {
427
+ "id": 12,
428
+ "uuid": "631fc3a9-7f5f-4415-8283-78ff582be483",
429
+ "display_name": "org-customer_support.md",
430
+ "node_count": 3
431
+ },
432
+ {
433
+ "id": 13,
434
+ "uuid": "d4c3d3db-6f24-433e-b2aa-52a70a0af976",
435
+ "display_name": "org-earnings_fy2023.md",
436
+ "node_count": 5
437
+ },
438
+ {
439
+ "id": 14,
440
+ "uuid": "08dd478b-414b-46c4-95c0-4d96e2089e90",
441
+ "display_name": "org-management_team.md",
442
+ "node_count": 3
443
+ }
444
+ ],
445
+ "document_count": 7,
446
+ "uuid": "1d2849b4-2715-4dcf-aa68-090a221942ba",
447
+ "display_name": "Pepe Corp. (company)",
448
+ "created_at": "2023-05-05T10:42:55.258902",
449
+ "updated_at": "2023-05-05T10:42:55.258904"
450
+ }
451
+ ]
452
+ ```
453
+
454
+ <br/>
455
+
456
+ ## Document
457
+
458
+ This can be thought of as an artifact related to a product, like an FAQ page or a PDF with financial statement earnings. You can view all the Documents associated with an Organization’s Project like so:
459
+
460
+ ![documents.png](https://github.com/paulpierre/RasaGPT/blob/main/github/documents.png?raw=true)
461
+
462
+ ```bash
463
+ {
464
+ "id": 1,
465
+ "uuid": "44a4b60b-9280-4b21-a676-00612be9aa87",
466
+ "organization": {
467
+ "id": 1,
468
+ "uuid": "d2a642e6-c81a-4a43-83e2-22cee3562452",
469
+ "display_name": "Pepe Corp.",
470
+ "bot_url": null,
471
+ "status": 2,
472
+ "created_at": "2023-05-05T10:42:45.933976",
473
+ "updated_at": "2023-05-05T10:42:45.933979",
474
+ "namespace": "pepe"
475
+ },
476
+ "document_count": 1,
477
+ "documents": [
478
+ {
479
+ "id": 1,
480
+ "uuid": "92604623-e37c-4935-bf08-0e9efa8b62f7",
481
+ "organization_id": 1,
482
+ "project_id": 1,
483
+ "display_name": "project-pepetamine.md",
484
+ "url": "",
485
+ "data": "# Pepetamine\n\nProduct Name: Pepetamine\n\nPurpose: Increases cognitive focus just like the Limitless movie\n\n**How to Use**\n\nPepetamine is available in the form of rare Pepe-coated tablets. The recommended dosage is one tablet per day, taken orally with a glass of water, preferably while browsing your favorite meme forum for maximum cognitive enhancement. For optimal results, take Pepetamine 30 minutes before engaging in mentally demanding tasks, such as decoding ancient Pepe hieroglyphics or creating your next viral meme masterpiece.\n\n**Side Effects**\n\nSome potential side effects of Pepetamine may include:\n\n1. Uncontrollable laughter and a sudden appreciation for dank memes\n2. An inexplicable desire to collect rare Pepes\n3. Enhanced meme creation skills, potentially leading to internet fame\n4. Temporary green skin pigmentation, resembling the legendary Pepe himself\n5. Spontaneously speaking in \"feels good man\" language\n\nWhile most side effects are generally harmless, consult your memologist if side effects persist or become bothersome.\n\n**Precautions**\n\nBefore taking Pepetamine, please consider the following precautions:\n\n1. Do not use Pepetamine if you have a known allergy to rare Pepes or dank memes.\n2. Pepetamine may not be suitable for individuals with a history of humor deficiency or meme intolerance.\n3. Exercise caution when driving or operating heavy machinery, as Pepetamine may cause sudden fits of laughter or intense meme ideation.\n\n**Interactions**\n\nPepetamine may interact with other substances, including:\n\n1. Normie supplements: Combining Pepetamine with normie supplements may result in meme conflicts and a decreased sense of humor.\n2. Caffeine: The combination of Pepetamine and caffeine may cause an overload of energy, resulting in hyperactive meme creation and potential internet overload.\n\nConsult your memologist if you are taking any other medications or substances to ensure compatibility with Pepetamine.\n\n**Overdose**\n\nIn case of an overdose, symptoms may include:\n\n1. Uncontrollable meme creation\n2. Delusions of grandeur as the ultimate meme lord\n3. Time warps into the world of Pepe\n\nIf you suspect an overdose, contact your local meme emergency service or visit the nearest meme treatment facility. Remember, the key to enjoying Pepetamine is to use it responsibly, and always keep in mind the wise words of our legendary Pepe: \"Feels good man.\"",
486
+ "hash": "fdee6da2b5441080dd78e7850d3d2e1403bae71b9e0526b9dcae4c0782d95a78",
487
+ "version": 1,
488
+ "status": 2,
489
+ "created_at": "2023-05-05T10:42:46.755428",
490
+ "updated_at": "2023-05-05T10:42:46.755431"
491
+ }
492
+ ],
493
+ "display_name": "Pepetamine",
494
+ "created_at": "2023-05-05T10:42:46.060930",
495
+ "updated_at": "2023-05-05T10:42:46.060934"
496
+ }
497
+ ```
498
+
499
+ <br/>
500
+
501
+ ## Node
502
+
503
+ Although this is not exposed in the API, a node is a chunk of a document which embeddings get generated for. Nodes are used for retrieval search as well as context injection. A node belongs to a document.
504
+
505
+ <br/>
506
+
507
+ ## User
508
+
509
+ A user represents the person talking to a bot. Users do not necessarily belong to an org or product, but this relationship is captured in ChatSession below.
510
+
511
+ <br/>
512
+
513
+ ## ChatSession
514
+
515
+ Not exposed via API, but this represent a question and answer between the User and a bot. Each of these objects can be flexibly identified by a `session_id` which gets automatically generated. Chat Sessions contain rich metadata that can be used for training and optimization. ChatSessions via the `/chat` endpoint ARE in fact associated with organization (for multi-tenant security purposes)
516
+
517
+ <br/><br/>
518
+
519
+ # **📚 How it works**
520
+
521
+ <br/>
522
+
523
+ ## Rasa
524
+
525
+ 1. Rasa handles integration with the communication channel, in this case Telegram.
526
+ - It specifically handles submitting the target webhook user feedback should go through. In our case it is our FastAPI server via `/webhooks/{channel}/webhook`
527
+ 2. Rasa has two components, the core [Rasa app](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa) and an Rasa [actions server](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa/actions) that runs separately
528
+ 3. Rasa must be configured (done already) via a few yaml files:
529
+ - [config.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/config.yml) - contains NLU pipeline and policy configuration. What matters is setting the `FallbackClassifier` threshold
530
+ - [credentials.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/credentials.yml) - contains the path to our webhook and Telegram credentials. This will get updated by the helper service `rasa-credentials` via [update_credentials.py](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/update_credentials.py)
531
+ - [domain.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/domain.yml) - This contains the chat entrypoint logic configuration like intent and the action to take against the intent. Here we add the `action_gpt_fallback` action which will trigger our [actions server](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa/actions)
532
+ - [endpoints.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/endpoints.yml) - This is where we set our custom action end-point for Rasa to trigger our fallback
533
+ - [nlu.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/nlu.yml) - this is where we set our intent `out_of_scope`
534
+ - [rules.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/rules.yml) - we set a rule for this intent that it should trigger the action `action_gpt_fallback`
535
+ - [actions.py](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/actions/actions.py) - this is where we define and express our action via the `ActionGPTFallback` class. The method `name` returns the action we defined for our intent above
536
+ 4. Rasa's NLU models must be trained which can be done via CLI with `rasa train` . This is done automatically for you when you run `make install`
537
+ 5. Rasa's core must be ran via `rasa run` after training
538
+ 6. Rasa's action server must be ran separately with `rasa run actions`
539
+
540
+ <br/>
541
+
542
+ ## Telegram
543
+
544
+ 1. Rasa automatically updates the Telegram Bot API with your callback webhook from [credentials.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/credentials.yml).
545
+ 2. By default this is static. Since we are running on our local machine, we leverage [Ngrok](https://ngrok.com/) to generate a publically accessible URL and reverse tunnel into our docker container
546
+ 3. `rasa-credentials` service takes care of this process for you. Ngrok runs as a service, once it is ready `rasa-credentials` calls the local ngrok API to retrieve the tunnel URL and updates the `credentials.yml` file and restarts Rasa for you
547
+ 4. The webhook Telegram will send messages to will be our FastAPI server. Why this instead of Rasa? Because we want flexibility to capture metadata which Rasa makes a PITA and centralizing to the API server is ideal
548
+ 5. The FastAPI server forwards this to the Rasa webhook
549
+ 6. Rasa will then determine what action to take based on the user intent. Since the intents have been nerfed for this demo, it will go to the fallback action running in `actions.py`
550
+ 7. The custom action will capture the metadata and forward the response from FastAPI to the user
551
+
552
+ <br/>
553
+
554
+ ## PGVector
555
+
556
+ `pgvector` is a plugin for Postgres and automatically installed enabling your to store and calculate vector data types. We have our own implementation because the Langchain PGVector class is not flexible to adapt to our schema and we want flexibility.
557
+
558
+ 1. By default in postgres, any files in the container's path `/docker-entry-initdb.d` get run if the database has not been initialized. In the [postgres Dockerfile](https://github.com/paulpierre/RasaGPT/blob/main/app/db/Dockerfile) we copy `[create_db.sh` which creates](https://github.com/paulpierre/RasaGPT/blob/main/app/db/create_db.sh) the db and user for our database
559
+ 2. In the `[models` command](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/Makefile#L64) in the [Makefile](https://github.com/paulpierre/RasaGPT/blob/main/Makefile), we run the [models.py](https://github.com/paulpierre/RasaGPT/blob/main/app/api/models.py) in the API container which creates the tables from the models.
560
+ 3. The `[enable_vector` method](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/models.py#L266) enables the pgvector extension in the database
561
+
562
+ <br/>
563
+
564
+ ## Langchain
565
+
566
+ 1. The training data gets loaded in the database
567
+ 2. The data is indexed [if the index doesn't exist](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/main.py#L49) and [stored in a file named `index.json`](https://github.com/paulpierre/RasaGPT/blob/main/app/api/index.json)
568
+ 3. LlamaIndex uses a basic `GPTSimpleVectorIndex` to find the relevant data and [injects it into a prompt](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/main.py#L66).
569
+ 4. Guard rails via prompts are used to keep the conversation focused
570
+
571
+ <br/>
572
+
573
+ ## Bot flow
574
+
575
+ 1. The user will chat in Telegram and the message will be filtered for [existing intents](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/nlu.yml)
576
+ 2. If it detects there is no intent match but instead matches the `out_of_scope`, [based on rules.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/rules.yml) it will trigger the `action_gpt_fallback` action
577
+ 3. The `[ActionGPTFallback` function](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/actions/actions.py) will then call the [FastAPI API server](https://github.com/paulpierre/RasaGPT/blob/main/app/api/main.py)
578
+ 4. the API using LlamaIndex will find the relevant indexed content and inject it into a prompt to send to OpenAI for inference
579
+ 5. The prompt contains conversational guardrails including:
580
+ - Requests data be returned in JSON
581
+ - Create categorical tags based on what the user's question
582
+ - Return a boolean if the conversation should be escalated to a human (if there is no context match)
583
+
584
+ <br/><br/>
585
+
586
+
587
+ # 📝 TODO
588
+
589
+ - [ ] Implement LlamaIndex optimizations
590
+ - [ ] Implement chat history
591
+ - [ ] Implement [Query Routers Abstractions](https://medium.com/@jerryjliu98/unifying-llm-powered-qa-techniques-with-routing-abstractions-438e2499a0d0) to understand which search strategy to use (one-shot vs few-shot)
592
+ - [ ] Explore other indexing methods like Tree indexes, Keyword indexes
593
+ - [ ] Add chat history for immediate recall and context setting
594
+ - [ ] Add a secondary adversarial agent ([Dual pattern model](https://simonwillison.net/2023/Apr/25/dual-llm-pattern/)) with the following potential functionalities:
595
+ - [ ] Determine if the question has been answered and if not, re-optimize search strategy
596
+ - [ ] Ensure prompt injection is not occurring
597
+ - [ ] Increase baseline similarity search by exploring:
598
+ - [ ] Regularly generate “fake” document embeddings based on historical queries and link to actual documents via [HyDE pattern](https://wfhbrian.com/revolutionizing-search-how-hypothetical-document-embeddings-hyde-can-save-time-and-increase-productivity/)
599
+ - [ ] Regularly generate “fake” user queries based on documents and link to actual document so user input search and “fake” queries can match better
600
+
601
+
602
+ <br/><br/>
603
+
604
+ # 🔍 Troubleshooting
605
+
606
+ In general, check your docker container logs by simply going to 👉 http://localhost:9999/
607
+
608
+ <br/>
609
+
610
+ ## Ngrok issues
611
+
612
+ Always check that your webhooks with ngrok and Telegram match. Simply do this by
613
+
614
+ ```bash
615
+ curl -sS "https://api.telegram.org/bot<your-bot-secret-token>/getWebhookInfo" | json_pp
616
+ ```
617
+
618
+ <br/>
619
+
620
+ .. should return this:
621
+
622
+ ```bash
623
+ {
624
+ "ok": true,
625
+ "result": {
626
+ "url": "https://b280-04-115-40-112.ngrok-free.app/webhooks/telegram/webhook",
627
+ "has_custom_certificate": false,
628
+ "pending_update_count": 0,
629
+ "max_connections": 40,
630
+ "ip_address": "1.2.3.4"
631
+ }
632
+ }
633
+ ```
634
+
635
+ <br/>
636
+
637
+ .. which should match the URL in your `credentials.yml` file or visit the Ngrok admin UI 👉 [http://localhost:4040/status](http://localhost:4040/status)
638
+
639
+ ![ngrok-admin.png](https://github.com/paulpierre/RasaGPT/blob/main/github/ngrok-admin.png?raw=true)
640
+
641
+ <br/>
642
+
643
+ Looks like it is a match. If not, restart by everything by running:
644
+
645
+ ```bash
646
+ make restart
647
+ ```
648
+
649
+ <br/><br/>
650
+
651
+ # 💪 Contributing / Issues
652
+
653
+ - Pull requests welcome
654
+ - Please submit issues via Github, I will do my best to resolve them
655
+ - If you want to get in touch, feel free to hmu on twitter via `[@paulpierre](https://twitter.com/paulpierre)`
656
+
657
+ <br/><br/>
658
+
659
+ > ![thumbsup](https://camo.githubusercontent.com/bcb43227c1e90a1d27996eb75ac794bbf20d1355b36d0e9eaa71c71ad4dd2a56/68747470733a2f2f6d65646961342e67697068792e636f6d2f6d656469612f313149537762674378457a4d79592f67697068792e6769663f6369643d65636630356534376664703164727a72766178733175787532666269376f72316e68626f6d39326d30346436306e786b2665703d76315f676966735f72656c61746564267269643d67697068792e6769662663743d67)
660
+ > <br/> Congratulations, all your base are belong to us! kthxbye
661
+
662
+ <br/><br/>
663
+
664
+ # 📜 Open source license
665
+
666
+ Copyright (c) 2023 Paul Pierre. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
app/api/Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ # Set the path to the API directory
4
+ WORKDIR /app/api
5
+
6
+ # Copy the codebase into the container
7
+ COPY . .
8
+
9
+ # Install the requirements
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Expose the port
13
+ EXPOSE 8888
14
+
15
+ ENTRYPOINT ["uvicorn", "main:app", "--host", "api", "--port", "8888", "--reload"]
app/api/Makefile ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: default banner install run stop db-purge env db db-stop models api api-stop seed
2
+
3
+ default: help
4
+
5
+ help:
6
+ make banner
7
+ @echo "+------------------+"
8
+ @echo "| 🏠 CORE COMMANDS |"
9
+ @echo "+------------------+"
10
+ @echo "make install - Setup environment and build models"
11
+ @echo "make install-seed - Setup environment, build models and seed database"
12
+ @echo "make seed - Seed database with dummy data"
13
+ @echo "make run - Run database and API server"
14
+ @echo "make stop - Stop database and API server"
15
+ @echo "make db-purge - Delete all data in database\n"
16
+
17
+ banner:
18
+ @echo "\n\n-------------------------------------"
19
+ @echo "▒█▀▀█ █▀▀█ █▀▀ █▀▀█ ▒█▀▀█ ▒█▀▀█ ▀▀█▀▀"
20
+ @echo "▒█▄▄▀ █▄▄█ ▀▀█ █▄▄█ ▒█░▄▄ ▒█▄▄█ ░▒█░░"
21
+ @echo "▒█░▒█ ▀░░▀ ▀▀▀ ▀░░▀ ▒█▄▄█ ▒█░░░ ░▒█░░"
22
+ @echo "+-----------------------------------+"
23
+ @echo "| 🤖 FastAPI LLM server |"
24
+ @echo "+-----------------------------------+\n\n"
25
+
26
+ # ================
27
+ # 🏠 CORE COMMANDS
28
+ # ================
29
+
30
+ # ----------------------------------
31
+ # Setup environment and build models
32
+ # ----------------------------------
33
+ install:
34
+ @make banner
35
+ @make api-install
36
+ @make db-stop
37
+ @echo "✅ Installation complete. Run 'make run' to start services.\n"
38
+
39
+ # ----------------------------------------------
40
+ # Setup environment and build models and seed DB
41
+ # ----------------------------------------------
42
+ install-seed:
43
+ @make banner
44
+ @make api-install
45
+ @make seed
46
+ @make db-stop
47
+ @echo "✅ Installation complete. Run 'make run' to start services.\n"
48
+
49
+
50
+ seed:
51
+ @make banner
52
+ @echo "🌱 Seeding database ..\n"
53
+ @python3 seed.py
54
+
55
+ # ---------------------------
56
+ # Run database and API server
57
+ # ---------------------------
58
+ run:
59
+ @make banner
60
+ @make env
61
+ @echo "🚀 Starting services ..\n"
62
+ @make db
63
+ @make api
64
+
65
+ # ----------------------------
66
+ # Stop database and API server
67
+ # ----------------------------
68
+ stop:
69
+ @make banner
70
+ @echo "🛑 Stopping services ..\n"
71
+ @make db-stop
72
+ @make api-stop
73
+
74
+ # ---------------------------
75
+ # Delete all data in database
76
+ # ---------------------------
77
+ db-purge:
78
+ @echo "⛔ Are you sure you want to delete all data in the database? [y/N]\n"
79
+ @read confirmation; \
80
+ if [ "$$confirmation" = "y" ] || [ "$$confirmation" = "Y" ]; then \
81
+ echo "Deleting generated files..."; \
82
+ make db-stop; \
83
+ rm -rf ../../mnt; \
84
+ echo "Deleted."; \
85
+ else \
86
+ echo "Aborted."; \
87
+ fi
88
+
89
+ # ==================
90
+ # 💁 HELPER COMMANDS
91
+ # ==================
92
+
93
+ # --------------
94
+ # Install helper
95
+ # --------------
96
+ api-install:
97
+ @echo "🏗️ Installing services ..\n"
98
+ @make env-create
99
+ @make db-stop
100
+ @make db
101
+ @make models
102
+
103
+ # -------------------
104
+ # Create a virtualenv
105
+ # -------------------
106
+ env-create:
107
+ @echo "Creating virtual environment .."
108
+ @if [ -d "./venv" ]; then \
109
+ echo "Environment and dependecies created already, loading .."; \
110
+ source ./venv/bin/activate; \
111
+ else \
112
+ echo "Install dependencies .."; \
113
+ python3 -m venv venv; \
114
+ . venv/bin/activate; \
115
+ pip3 install -r requirements.txt; \
116
+ fi
117
+
118
+ # ---------------------------
119
+ # Load an existing virtualenv
120
+ # ---------------------------
121
+ env:
122
+ @echo "loading virtual environment if exists"
123
+ @if [ -d "./venv" ]; then \
124
+ source ./venv/bin/activate; \
125
+ fi
126
+
127
+ # -----------------
128
+ # Start Postgres DB
129
+ # -----------------
130
+ db:
131
+ @echo "Starting Postgres with pgvector .."
132
+ @cd ../../ && docker-compose -f docker-compose.yml up -d db
133
+
134
+ # -----------------
135
+ # Stop Postgres DB
136
+ # -----------------
137
+ db-stop:
138
+ @cd ../../ && docker-compose -f docker-compose.yml up -d db
139
+
140
+ # ------------
141
+ # Build models
142
+ # ------------
143
+ models:
144
+ @echo "💽 Building models in database .."
145
+ @sleep 60 && python3 models.py
146
+
147
+ # -----------------
148
+ # Start FastAPI API
149
+ # -----------------
150
+ api:
151
+ @echo "⚡ Starting FastAPI API server .."
152
+ @sleep 5 && uvicorn main:app --port 8888 --reload
153
+
154
+ # -----------------
155
+ # Stop FastAPI API
156
+ # -----------------
157
+ api-stop:
158
+ @echo "🛑 Stopping FastAPI server .."
159
+ @killall uvicorn
app/api/config.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum, IntEnum
2
+ from dotenv import load_dotenv
3
+ from pathlib import Path
4
+ import logging
5
+ import sys
6
+ import os
7
+
8
+ # -------
9
+ # Logging
10
+ # -------
11
+
12
+ logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
13
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # ----------------
17
+ # Environment vars
18
+ # ----------------
19
+ env = os.getenv("ENV", None)
20
+
21
+ if not env:
22
+ # Check up to 2 levels up for .env-{env} file
23
+ env_file = Path(__file__).parent.parent.parent / '.env'
24
+ logger.debug(f"Loading env file: {env_file}")
25
+ if os.path.exists(env_file):
26
+ load_dotenv(dotenv_path=env_file)
27
+ else:
28
+ raise Exception(f"Env file {env})file not found")
29
+
30
+ # -----------------------
31
+ # Configuration constants
32
+ # -----------------------
33
+ readme_file = Path(__file__).parent / "API.md"
34
+
35
+ readme_str = (
36
+ f"""
37
+ <details>
38
+ <summary>📕 API.MD</summary>
39
+ {readme_file.read_text()}
40
+
41
+ </details>
42
+
43
+ """
44
+ if readme_file.exists()
45
+ else ""
46
+ )
47
+ APP_NAME = "API Documentation"
48
+ APP_VERSION = "0.0.1"
49
+ APP_DESCRIPTION = f"""
50
+ ![img](/static/img/rasagpt-logo-1.png)
51
+
52
+ ---
53
+ ## About
54
+ 💬 RasaGPT is the first headless LLM chatbot platform built on top of Rasa and Langchain
55
+
56
+ - 📚 Resources: [https://rasagpt.dev](https://rasagpt.dev)
57
+ - 🧑‍💻 Github: [https://github.com/paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT)
58
+ - 🧙 Author: [@paulpierre](https://twitter.com/paulpierre)
59
+
60
+ {readme_str}
61
+ """
62
+ APP_ICON = "/public/img/rasagpt-icon-200x200.png"
63
+ APP_LOGO = "/public/img/rasagpt-logo-1.png"
64
+
65
+ FILE_UPLOAD_PATH = os.getenv("FILE_UPLOAD_PATH", "/tmp")
66
+
67
+ # Database configurations
68
+ POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
69
+ POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "postgres")
70
+ DB_HOST = os.getenv("DB_HOST", "localhost")
71
+ DB_PORT = os.getenv("DB_PORT", 5432)
72
+ DB_USER = os.getenv("DB_USER")
73
+ DB_NAME = os.getenv("DB_NAME")
74
+ DB_PASSWORD = os.getenv("DB_PASSWORD")
75
+ DSN = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
76
+ SU_DSN = (
77
+ f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
78
+ )
79
+
80
+ RASA_WEBHOOK_HOST = os.getenv("RASA_WEBHOOK_HOST", "rasa-core")
81
+ RASA_WEBHOOK_PORT = os.getenv("RASA_WEBHOOK_PORT", 5005)
82
+ RASA_WEBHOOK_URL = f"http://{RASA_WEBHOOK_HOST}:{RASA_WEBHOOK_PORT}"
83
+
84
+ # LLM configurations
85
+ MODEL_NAME = os.getenv("MODEL_NAME")
86
+ LLM_DEFAULT_TEMPERATURE = float(os.getenv("LLM_DEFAULT_TEMPERATURE", 0.0))
87
+ LLM_CHUNK_SIZE = int(os.getenv("LLM_CHUNK_SIZE", 512))
88
+ LLM_CHUNK_OVERLAP = int(os.getenv("LLM_CHUNK_OVERLAP", 20))
89
+ LLM_DISTANCE_THRESHOLD = float(os.getenv("LLM_DISTANCE_THRESHOLD", 0.5))
90
+ LLM_MAX_OUTPUT_TOKENS = int(os.getenv("LLM_MAX_OUTPUT_TOKENS", 256))
91
+ LLM_MIN_NODE_LIMIT = int(os.getenv("LLM_MIN_NODE_LIMIT", 3))
92
+
93
+
94
+ class DISTANCE_STRATEGY(Enum):
95
+ COSINE = "cosine"
96
+ EUCLIDEAN = "euclidean"
97
+ MAX_INNER_PRODUCT = "max_inner_product"
98
+
99
+ def __new__(cls, strategy_name: str):
100
+ obj = object.__new__(cls)
101
+ obj._value_ = strategy_name
102
+ return obj
103
+
104
+ @property
105
+ def strategy_name(self) -> str:
106
+ return self.value
107
+
108
+
109
+ DISTANCE_STRATEGIES = [
110
+ (
111
+ DISTANCE_STRATEGY.EUCLIDEAN,
112
+ "euclidean",
113
+ "<->",
114
+ "CREATE INDEX ON node USING ivfflat (embeddings vector_l2_ops) WITH (lists = 100);",
115
+ ),
116
+ (
117
+ DISTANCE_STRATEGY.COSINE,
118
+ "cosine",
119
+ "<=>",
120
+ "CREATE INDEX ON node USING ivfflat (embeddings vector_cosine_ops) WITH (lists = 100);",
121
+ ),
122
+ (
123
+ DISTANCE_STRATEGY.MAX_INNER_PRODUCT,
124
+ "max_inner_product",
125
+ "<#>",
126
+ "CREATE INDEX ON node USING ivfflat (embeddings vector_ip_ops) WITH (lists = 100);",
127
+ ),
128
+ ]
129
+ LLM_DEFAULT_DISTANCE_STRATEGY = DISTANCE_STRATEGY[
130
+ os.getenv("LLM_DEFAULT_DISTANCE_STRATEGY", "COSINE")
131
+ ]
132
+ VECTOR_EMBEDDINGS_COUNT = 1536
133
+ PGVECTOR_ADD_INDEX = True if os.getenv("PGVECTOR_ADD_INDEX", False) else False
134
+ # Model constants
135
+
136
+ DOCUMENT_TYPE = IntEnum("DOCUMENT_TYPE", ["PLAINTEXT", "MARKDOWN", "HTML", "PDF"])
137
+
138
+ ENTITY_STATUS = IntEnum(
139
+ "ENTITY_STATUS",
140
+ ["UNVERIFIED", "ACTIVE", "INACTIVE", "DELETED", "BANNED" "DEPRECATED"],
141
+ )
142
+ CHANNEL_TYPE = IntEnum(
143
+ "CHANNEL_TYPE", ["SMS", "TELEGRAM", "WHATSAPP", "EMAIL", "WEBSITE"]
144
+ )
145
+
146
+ AGENT_NAMES = [
147
+ "Aisha",
148
+ "Lilly",
149
+ "Hanna",
150
+ "Julia",
151
+ "Emily",
152
+ "Sophia",
153
+ "Alex",
154
+ "Isabella",
155
+ ]
156
+
157
+
158
+ class LLM_MODELS(Enum):
159
+ TEXT_DAVINCI_003 = "text-davinci-003", 4097
160
+ GPT_35_TURBO = "gpt-3.5-turbo", 4096
161
+ TEXT_DAVINCI_002 = "text-davinci-002", 4097
162
+ CODE_DAVINCI_002 = "code-davinci-002", 8001
163
+ GPT_4 = "gpt-4", 8192
164
+ GPT_4_32K = "gpt-4-32k", 32768
165
+
166
+ def __init__(self, model_name, token_limit):
167
+ self._model_name = model_name
168
+ self._token_limit = token_limit
169
+
170
+ @property
171
+ def model_name(self) -> str:
172
+ return self._model_name
173
+
174
+ @property
175
+ def token_limit(self) -> int:
176
+ return self._token_limit
app/api/data/training_data/org-about_the_company.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # About the company
2
+
3
+ Title: About Pepe Corp
4
+
5
+ **Introduction**
6
+
7
+ Pepe Corp is a groundbreaking parody pharmaceutical and technology company inspired by the iconic Pepe the Frog meme. Our mission is to revolutionize the world through the power of dank memes, laughter, and innovation. By merging the realms of science and meme culture, we create cutting-edge products that entertain, amaze, and bring people together.
8
+
9
+ **Our Story**
10
+
11
+ Founded in 2021 by a group of visionary memelords, Pepe Corp began as a small startup focused on creating meme-based pharmaceuticals to counter the growing seriousness in the world. Recognizing the untapped potential of meme culture, our founders set out to explore the intersection between humor, science, and technology.
12
+
13
+ Over the years, Pepe Corp has grown into a multidisciplinary organization with a diverse portfolio of products, ranging from meme-inspired pharmaceuticals to advanced brain implants and mobile applications. Our products are designed to elevate the human experience by harnessing the power of memes, laughter, and the Pepe spirit.
14
+
15
+ **Our Values**
16
+
17
+ At Pepe Corp, our values drive everything we do:
18
+
19
+ 1. **Innovation**: We are committed to pushing the boundaries of meme science and technology to develop products that challenge conventional wisdom and redefine the limits of possibility.
20
+ 2. **Laughter**: We believe that laughter is a universal language that transcends barriers and brings people together. Our products are designed to elicit joy and promote the healing power of humor.
21
+ 3. **Community**: We strive to create a global community of meme enthusiasts who share our passion for innovation, laughter, and the Pepe spirit.
22
+ 4. **Sustainability**: We are dedicated to operating our business in a responsible and sustainable manner, ensuring the continued availability of dank memes for future generations.
23
+
24
+ **Our Products**
25
+
26
+ Pepe Corp's diverse product portfolio includes:
27
+
28
+ 1. *Meme-inspired Pharmaceuticals*: Our groundbreaking pharmaceuticals blend meme culture with scientific research to provide unique and entertaining experiences for our customers.
29
+ 2. *Cutting-Edge Devices*: From brain implants that enable meme-based communication to wearable technology that tracks your meme usage, our devices push the limits of what's possible.
30
+ 3. *Mobile Applications*: Our multi-platform mobile apps, such as PepeTrak and MemeGen, offer innovative solutions to enhance mental health, monitor meme consumption, and generate personalized meme experiences.
31
+
32
+ **Our Future**
33
+
34
+ As we look to the future, Pepe Corp is dedicated to exploring new frontiers in meme science and technology. We will continue to develop innovative products that entertain, inspire, and bring people together. By harnessing the power of memes and the Pepe spirit, we aim to make the world a better, funnier place for all.
35
+
36
+ Join us on this incredible journey as we redefine the limits of possibility and laughter with Pepe Corp, where the future is dank!
app/api/data/training_data/org-board_of_directors.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Board of Directors
2
+
3
+ Title: Board of Directors
4
+
5
+ **Introduction**
6
+
7
+ At Pepe Corp, our Board of Directors is composed of accomplished leaders with diverse backgrounds and expertise in various fields. They share our passion for innovation, laughter, and the Pepe spirit, and are committed to guiding our company's strategic vision and overseeing its growth. Together, our Board of Directors ensures that Pepe Corp remains at the forefront of meme science and technology, making the world a better, funnier place.
8
+
9
+ **Board Members**
10
+
11
+ 1. **Felix Memelord**: Chairman of the Board and Co-Founder
12
+ Felix is a pioneering memelord with over a decade of experience in meme culture and internet humor. As the co-founder of Pepe Corp, he has been instrumental in shaping the company's strategic direction and ensuring its commitment to innovation, laughter, and the Pepe spirit.
13
+ 2. **Dr. Olivia Dankstein**: Vice-Chair and Chief Scientific Officer
14
+ Dr. Dankstein is a renowned scientist with extensive experience in neuroscience, psychology, and the science of humor. She is responsible for leading Pepe Corp's research and development efforts, driving the company's pursuit of groundbreaking meme-inspired pharmaceuticals and technologies.
15
+ 3. **Mona Laughsalot**: Director and Chief Marketing Officer
16
+ Mona is a marketing expert with a background in digital media and consumer engagement. She has been instrumental in building Pepe Corp's global brand presence and creating memorable campaigns that showcase the company's innovative products and commitment to laughter.
17
+ 4. **Henry Frogman**: Director and Chief Financial Officer
18
+ Henry is a seasoned finance executive with experience in corporate strategy and financial management. He oversees Pepe Corp's financial operations, ensuring the company's long-term sustainability and supporting its growth objectives.
19
+ 5. **Iris Memequeen**: Director and Head of Human Resources
20
+ Iris is a human resources professional with a passion for fostering inclusive and supportive workplace environments. She is responsible for developing and implementing Pepe Corp's talent management strategies, cultivating a diverse and empowered team that embodies the Pepe spirit.
21
+ 6. **Jackie Jokster**: Director and Head of Product Development
22
+ Jackie is an accomplished product developer with expertise in software engineering and user experience design. She leads Pepe Corp's product development efforts, ensuring that the company's offerings remain innovative, entertaining, and user-friendly.
23
+ 7. **Vincent Laugherino**: Director and General Counsel
24
+ Vincent is an experienced attorney with a background in intellectual property, corporate law, and regulatory compliance. He advises Pepe Corp's management team and Board of Directors on legal matters, ensuring that the company operates in accordance with applicable laws and regulations.
25
+
26
+ **Conclusion**
27
+
28
+ The Pepe Corp Board of Directors is a dynamic team of leaders who are dedicated to guiding the company's strategic vision and overseeing its growth. By leveraging their diverse expertise and shared passion for innovation, laughter, and the Pepe spirit, our Board of Directors ensures that Pepe Corp remains a global leader in meme science and technology.
app/api/data/training_data/org-company_story.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Company Story
2
+
3
+ Title: Company Story
4
+
5
+ **The Genesis of Pepe Corp**
6
+
7
+ In the early 21st century, a group of visionary memelords gathered in a dimly lit basement, fueled by their passion for dank memes and the iconic Pepe the Frog. As they contemplated the growing seriousness of the world, they were struck by a sudden realization: the power of memes could be harnessed to create groundbreaking products that would not only entertain but also bring people together. And so, in 2021, Pepe Corp was born.
8
+
9
+ **The Early Days: Pioneering Meme-Based Pharmaceuticals**
10
+
11
+ The founders of Pepe Corp were determined to explore the untapped potential of meme culture. They began by creating a range of meme-inspired pharmaceuticals, blending cutting-edge scientific research with the universal language of laughter. These innovative products quickly gained a cult following, garnering both amusement and curiosity.
12
+
13
+ **Expanding Horizons: Meme-Driven Technology**
14
+
15
+ As Pepe Corp's reputation grew, so did its ambitions. The company expanded its focus to include the development of advanced technology inspired by meme culture. From brain implants that enabled meme-based communication to wearable devices that monitored meme consumption, Pepe Corp's inventions captured the imagination of people worldwide.
16
+
17
+ **Becoming a Global Meme Phenomenon**
18
+
19
+ Pepe Corp's rapid rise to prominence caught the attention of mainstream media, turning the once-underground company into a global meme phenomenon. Their unique blend of humor, science, and technology resonated with audiences across generations and cultures, establishing Pepe Corp as a symbol of innovation and laughter.
20
+
21
+ **Building a Meme Empire**
22
+
23
+ Today, Pepe Corp is a multidisciplinary organization with a diverse portfolio of products and services. Its offerings include meme-inspired pharmaceuticals, cutting-edge devices, and multi-platform mobile applications designed to enhance mental health, monitor meme consumption, and generate personalized meme experiences.
24
+
25
+ Pepe Corp's success is built on its unwavering commitment to innovation, laughter, and community. The company continues to push the boundaries of what's possible in the realm of meme science and technology, with the ultimate goal of making the world a better, funnier place.
26
+
27
+ **The Future: Redefining Possibility and Laughter**
28
+
29
+ As Pepe Corp looks to the future, it remains dedicated to exploring new frontiers in meme science and technology. The company's research and development teams are hard at work, creating the next generation of meme-based products that will entertain, inspire, and bring people together.
30
+
31
+ Pepe Corp's journey has only just begun, but its impact is already being felt around the world. As the company continues to redefine the limits of possibility and laughter, one thing is certain: the future is dank, and Pepe Corp is leading the way.
app/api/data/training_data/org-corporate_philosophy.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Corporate philosophy
2
+
3
+ Title: Corporate Philosophy
4
+
5
+ **Introduction**
6
+
7
+ At Pepe Corp, our corporate philosophy is rooted in the belief that humor, innovation, and the power of memes can transform the world. We are dedicated to creating groundbreaking products that entertain, amaze, and unite people in laughter. Our philosophy shapes every aspect of our business, guiding our decisions and inspiring our actions.
8
+
9
+ **Our Core Principles**
10
+
11
+ 1. **Innovation**: We believe in pushing the boundaries of meme science and technology to develop products that challenge conventional wisdom and redefine the limits of possibility. Our commitment to innovation drives us to explore the untapped potential of meme culture, turning wild ideas into reality.
12
+ 2. **Laughter**: We recognize that laughter is a universal language that transcends barriers and brings people together. We are dedicated to creating products that elicit joy, promote the healing power of humor, and spread the spirit of Pepe the Frog throughout the world.
13
+ 3. **Community**: We strive to foster a global community of meme enthusiasts who share our passion for innovation, laughter, and the Pepe spirit. By cultivating a supportive and inclusive environment, we empower our team members, customers, and partners to express themselves freely and embrace the power of memes.
14
+ 4. **Sustainability**: We understand that our success depends on the well-being of the world around us. We are committed to operating our business in a responsible and sustainable manner, ensuring the continued availability of dank memes for future generations.
15
+
16
+ **Our Ethos: The Pepe Spirit**
17
+
18
+ The Pepe spirit is at the heart of our corporate philosophy. Inspired by the iconic Pepe the Frog meme, this ethos represents our unwavering commitment to laughter, creativity, and the power of memes. The Pepe spirit guides our actions, influences our decisions, and serves as a constant reminder of our mission to make the world a better, funnier place.
19
+
20
+ **Our Commitment**
21
+
22
+ As a company, we pledge to uphold our corporate philosophy in every aspect of our business. We are committed to:
23
+
24
+ - Continuously exploring new frontiers in meme science and technology
25
+ - Developing innovative products that entertain, inspire, and bring people together
26
+ - Fostering a diverse and inclusive workplace that values creativity, collaboration, and the Pepe spirit
27
+ - Conducting our business with integrity, transparency, and a commitment to sustainability
28
+
29
+ **Conclusion**
30
+
31
+ At Pepe Corp, our corporate philosophy is more than just a set of guiding principles; it is the very essence of who we are as a company. By embracing the power of memes, laughter, and the Pepe spirit, we aim to transform the world and create a brighter, funnier future for all.
app/api/data/training_data/org-customer_support.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Customer support
2
+
3
+ Title: Management Team
4
+
5
+ **Introduction**
6
+
7
+ Pepe Corp's management team is composed of accomplished professionals who bring their unique expertise and passion for the Pepe spirit to the forefront of our operations. This dedicated team of leaders is committed to fostering an innovative, inclusive, and laughter-driven culture while driving the company's strategic objectives and ensuring its continued success.
8
+
9
+ **Management Team Members**
10
+
11
+ 1. **Amanda Gigglestein**: Chief Executive Officer
12
+ Amanda is an experienced executive with a background in technology and entertainment. As CEO, she is responsible for setting Pepe Corp's strategic direction, overseeing its day-to-day operations, and ensuring that the company remains at the forefront of meme science and technology.
13
+ 2. **Brandon Memeinator**: Chief Operating Officer
14
+ Brandon is a seasoned operations professional with extensive experience in scaling businesses and optimizing processes. As COO, he is responsible for streamlining Pepe Corp's operations, driving efficiency, and supporting the company's growth objectives.
15
+ 3. **Dr. Penelope Pepescience**: Chief Research Officer
16
+ Dr. Pepescience is a leading expert in the field of meme psychology and neuroscience. As CRO, she directs Pepe Corp's research efforts, spearheading groundbreaking studies and collaborations that fuel the company's innovative product pipeline.
17
+ 4. **Claire Froglover**: Chief Creative Officer
18
+ Claire is a talented creative professional with a background in graphic design, branding, and content creation. As CCO, she oversees Pepe Corp's creative direction, ensuring that the company's products, campaigns, and communications reflect the Pepe spirit and resonate with audiences worldwide.
19
+ 5. **Timothy Chuckles**: Chief Technology Officer
20
+ Timothy is an accomplished technology executive with expertise in software development, artificial intelligence, and data analytics. As CTO, he leads Pepe Corp's technology initiatives, developing cutting-edge solutions that enhance the company's offerings and delight its customers.
21
+ 6. **Sophia Memevator**: Chief Customer Officer
22
+ Sophia is a customer experience expert with a passion for creating memorable and engaging interactions. As CCO, she oversees Pepe Corp's customer service, community engagement, and user experience efforts, ensuring that the company's customers remain at the heart of everything it does.
23
+ 7. **Edward Laughton**: Chief Sustainability Officer
24
+ Edward is a sustainability professional with a background in environmental management and corporate social responsibility. As CSO, he is responsible for developing and implementing Pepe Corp's sustainability strategy, ensuring the company's operations are conducted responsibly and with minimal environmental impact.
25
+
26
+ **Conclusion**
27
+
28
+ Pepe Corp's management team is a dynamic group of leaders who are dedicated to upholding the company's core values of innovation, laughter, and the Pepe spirit. By leveraging their diverse expertise and working collaboratively, our management team ensures that Pepe Corp remains a global leader in meme science and technology, making the world a better, funnier place.
app/api/data/training_data/org-earnings_fy2023.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Earnings FY2023
2
+
3
+ Title: Earnings FY2023
4
+
5
+ **Introduction**
6
+
7
+ As a leading company in meme science and technology, we are committed to continuing our mission of making the world a better, funnier place. In FY2023, we have focused on product innovation, strategic partnerships, and strengthening our global presence, while maintaining our commitment to the Pepe spirit and environmental sustainability.
8
+
9
+ **Strategic Objectives**
10
+
11
+ 1. **Product Innovation**: We will invest in research and development to create cutting-edge products that address the needs of our diverse customer base. Our R&D team will explore new areas of meme science and technology, developing products that improve mental health, communication, and entertainment experiences.
12
+ 2. **Market Expansion**: In FY2023, we plan to expand our presence in key international markets, targeting regions with significant growth potential. Our marketing team will develop tailored campaigns to raise awareness of our brand and products, while our sales team will work on securing new distribution partnerships.
13
+ 3. **Strategic Partnerships**: We will pursue strategic partnerships with leading companies in the technology, entertainment, and pharmaceutical industries, leveraging their expertise and resources to enhance our product offerings and reach new audiences.
14
+ 4. **Environmental Sustainability**: Pepe Corp is committed to operating responsibly and minimizing our environmental impact. In FY2023, we will work on implementing new sustainability initiatives and improving existing ones, focusing on areas such as waste reduction, energy efficiency, and responsible sourcing.
15
+ 5. **Talent Development**: Our employees are our greatest asset, and we will continue to invest in their growth and development in FY2023. We will offer comprehensive training programs, provide opportunities for career advancement, and maintain an inclusive and supportive work environment that embodies the Pepe spirit.
16
+ 6. **Customer Support Excellence**: We will strive to enhance our customer support experience, ensuring that our customers receive timely and effective assistance with their inquiries and concerns. Our Customer Support team will undergo additional training, and we will explore new technologies and processes to improve response times and customer satisfaction.
17
+
18
+ **Quarterly earnings:**
19
+
20
+ Pepe Corp is pleased to present our earnings report for Fiscal Year 2023. The following document outlines our quarterly organizational performance data and provides a simple but believable Profit and Loss (P&L) statement for the year. We are proud to report strong growth in revenue, driven by product innovation, market expansion, and strategic partnerships.
21
+
22
+ **Quarterly Organizational Performance Data**
23
+
24
+ *Note: All figures are in millions (USD)*
25
+
26
+ | Quarter | Revenue | Cost of Goods Sold (COGS) | Gross Profit | Operating Expenses | Operating Income | Net Income |
27
+ | --- | --- | --- | --- | --- | --- | --- |
28
+ | Q1 FY2023 | 50 | 20 | 30 | 15 | 15 | 12 |
29
+ | Q2 FY2023 | 55 | 22 | 33 | 16 | 17 | 13.5 |
30
+ | Q3 FY2023 | 60 | 24 | 36 | 17 | 19 | 15 |
31
+ | Q4 FY2023 | 70 | 28 | 42 | 18 | 24 | 19 |
32
+ | Total FY2023 | 235 | 94 | 141 | 66 | 75 | 59.5 |
33
+
34
+ **Profit & Loss Statement for FY2023**
35
+
36
+ *Note: All figures are in millions (USD)*
37
+
38
+ 1. Revenue: $235
39
+ 2. Cost of Goods Sold (COGS): $94
40
+ 3. Gross Profit: $141
41
+ 4. Operating Expenses: $66
42
+ a. Research & Development: $20
43
+ b. Sales & Marketing: $30
44
+ c. General & Administrative: $16
45
+ 5. Operating Income: $75
46
+ 6. Interest Expense: $3
47
+ 7. Taxes: $12.5
48
+ 8. Net Income: $59.5
49
+
50
+ Pepe Corp's performance in FY2023 reflects strong growth and continued success in our mission to make the world a better, funnier place. Our revenue has increased consistently throughout the year, and our strategic initiatives have resulted in improved operating income and net income. As we continue to innovate, expand, and strengthen our global presence, we are excited about the opportunities that lie ahead and are committed to delivering value to our shareholders and customers alike.
51
+
52
+ **Financial Projections**
53
+
54
+ In FY2024, we expect to achieve strong revenue growth driven by product innovation, market expansion, and strategic partnerships. We will continue to invest in R&D and marketing initiatives to support our growth objectives while maintaining a focus on cost optimization and operational efficiency.
55
+
56
+ **Conclusion**
57
+
58
+ Pepe Corp is excited about the opportunities that lie ahead in FY2024. We remain committed to our mission of making the world a better, funnier place, and we are confident that our strategic objectives will drive growth, innovation, and success in the upcoming fiscal year. Together, we will continue to spread the Pepe spirit and make a lasting impact on our customers and the world.
app/api/data/training_data/org-management_team.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Management team
2
+
3
+ Title: Management Team
4
+
5
+ **Introduction**
6
+
7
+ Pepe Corp's management team is composed of accomplished professionals who bring their unique expertise and passion for the Pepe spirit to the forefront of our operations. This dedicated team of leaders is committed to fostering an innovative, inclusive, and laughter-driven culture while driving the company's strategic objectives and ensuring its continued success.
8
+
9
+ **Management Team Members**
10
+
11
+ 1. **Amanda Gigglestein**: Chief Executive Officer
12
+ Amanda is an experienced executive with a background in technology and entertainment. As CEO, she is responsible for setting Pepe Corp's strategic direction, overseeing its day-to-day operations, and ensuring that the company remains at the forefront of meme science and technology.
13
+ 2. **Brandon Memeinator**: Chief Operating Officer
14
+ Brandon is a seasoned operations professional with extensive experience in scaling businesses and optimizing processes. As COO, he is responsible for streamlining Pepe Corp's operations, driving efficiency, and supporting the company's growth objectives.
15
+ 3. **Dr. Penelope Pepescience**: Chief Research Officer
16
+ Dr. Pepescience is a leading expert in the field of meme psychology and neuroscience. As CRO, she directs Pepe Corp's research efforts, spearheading groundbreaking studies and collaborations that fuel the company's innovative product pipeline.
17
+ 4. **Claire Froglover**: Chief Creative Officer
18
+ Claire is a talented creative professional with a background in graphic design, branding, and content creation. As CCO, she oversees Pepe Corp's creative direction, ensuring that the company's products, campaigns, and communications reflect the Pepe spirit and resonate with audiences worldwide.
19
+ 5. **Timothy Chuckles**: Chief Technology Officer
20
+ Timothy is an accomplished technology executive with expertise in software development, artificial intelligence, and data analytics. As CTO, he leads Pepe Corp's technology initiatives, developing cutting-edge solutions that enhance the company's offerings and delight its customers.
21
+ 6. **Sophia Memevator**: Chief Customer Officer
22
+ Sophia is a customer experience expert with a passion for creating memorable and engaging interactions. As CCO, she oversees Pepe Corp's customer service, community engagement, and user experience efforts, ensuring that the company's customers remain at the heart of everything it does.
23
+ 7. **Edward Laughton**: Chief Sustainability Officer
24
+ Edward is a sustainability professional with a background in environmental management and corporate social responsibility. As CSO, he is responsible for developing and implementing Pepe Corp's sustainability strategy, ensuring the company's operations are conducted responsibly and with minimal environmental impact.
25
+
26
+ **Conclusion**
27
+
28
+ Pepe Corp's management team is a dynamic group of leaders who are dedicated to upholding the company's core values of innovation, laughter, and the Pepe spirit. By leveraging their diverse expertise and working collaboratively, our management team ensures that Pepe Corp remains a global leader in meme science and technology, making the world a better, funnier place.
app/api/data/training_data/project-frogonil.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Frogonil
2
+
3
+ Product Name: Frogonil
4
+
5
+ Purpose: Increases capacity to shitpost on social media
6
+
7
+ **How to Use**
8
+
9
+ Frogonil is available in the form of concentrated Pepe-powered capsules. The recommended dosage is one capsule per day, taken orally with a glass of water and a side of your preferred social media platform. For maximum shitposting potential, take Frogonil 15 minutes prior to engaging in online discussions or while crafting your next witty response to a controversial tweet.
10
+
11
+ **Side Effects**
12
+
13
+ Some potential side effects of Frogonil may include:
14
+
15
+ 1. An insatiable appetite for internet trolling and stirring up online chaos
16
+ 2. Unexpectedly profound insights into meme culture
17
+ 3. An increased propensity for using the term "REEE" in everyday conversations
18
+ 4. A heightened sense of humor and the ability to find irony in even the darkest situations
19
+ 5. Periodic visions of Pepe as a spirit guide, leading you on a quest for the dankest memes
20
+
21
+ Please consult your memologist if side effects persist or become bothersome.
22
+
23
+ **Precautions**
24
+
25
+ Before taking Frogonil, consider the following precautions:
26
+
27
+ 1. Do not use Frogonil if you have a history of being a buzzkill or an intolerance for internet humor.
28
+ 2. Frogonil may not be suitable for individuals who prefer a quiet, peaceful online existence.
29
+ 3. Exercise caution when engaging in real-life conversations, as Frogonil may cause users to inadvertently apply shitposting tactics in inappropriate situations.
30
+
31
+ **Interactions**
32
+
33
+ Frogonil may interact with other substances or activities, including:
34
+
35
+ 1. Sarcasm inhibitors: Combining Frogonil with sarcasm inhibitors may lead to unpredictable humor fluctuations and an inability to detect irony.
36
+ 2. Internet detox: Frogonil is not recommended for those engaging in an internet detox, as the temptation to shitpost may become overwhelming.
37
+
38
+ Consult your memologist if you are taking any other medications or participating in any activities that may interfere with Frogonil's effectiveness.
39
+
40
+ **Overdose**
41
+
42
+ In case of an overdose, symptoms may include:
43
+
44
+ 1. Unstoppable shitposting, potentially leading to the creation of alternative online personas
45
+ 2. A warped perception of reality where memes become the ultimate truth
46
+ 3. Gaining an unsettling number of followers and admirers who appreciate your dark humor
47
+
48
+ If you suspect an overdose, contact your local meme crisis hotline or visit the nearest meme rehabilitation center. Remember, Frogonil is best enjoyed in moderation, and always heed the guidance of your Pepe-inspired spirit guide: "REEE-sponsibly."
app/api/data/training_data/project-kekzal.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Kekzal
2
+
3
+ Product Name: Kekzal
4
+
5
+ Purpose: Enhances sexual function
6
+
7
+ **How to Use**
8
+
9
+ Kekzal is available in the form of Pepe-approved, green diamond-shaped tablets. The recommended dosage is one tablet taken orally with a glass of water, approximately 60 minutes before engaging in intimate activities. For optimal results, immerse yourself in Pepe memes and share a laugh with your partner to set the mood.
10
+
11
+ **Side Effects**
12
+
13
+ Some potential side effects of Kekzal may include:
14
+
15
+ 1. An increased propensity for whispering sweet Pepe nothings into your partner's ear
16
+ 2. A heightened sense of humor, making even the most awkward intimate moments hilarious
17
+ 3. The occasional urge to shout "KEK!" during moments of passion
18
+ 4. Spontaneous and uncontrollable laughter during foreplay
19
+ 5. An unexplained affinity for green lingerie and bedroom accessories
20
+
21
+ Consult your memologist if side effects persist or become bothersome.
22
+
23
+ **Precautions**
24
+
25
+ Before taking Kekzal, consider the following precautions:
26
+
27
+ 1. Do not use Kekzal if you have a known allergy to dank memes or an aversion to Pepe-themed romantic encounters.
28
+ 2. Kekzal may not be suitable for individuals with a history of humor-related intimacy issues or meme-related performance anxiety.
29
+ 3. Exercise caution when engaging in intimate activities, as Kekzal may cause fits of laughter at inappropriate moments.
30
+
31
+ **Interactions**
32
+
33
+ Kekzal may interact with other substances or activities, including:
34
+
35
+ 1. Seriousness supplements: Combining Kekzal with seriousness supplements may result in a decreased sense of humor and diminished intimate enjoyment.
36
+ 2. Nostalgic media: Watching reruns of classic sitcoms or engaging in non-Pepe related activities may reduce Kekzal's effectiveness.
37
+
38
+ Consult your memologist if you are taking any other medications or participating in any activities that may interfere with Kekzal's intended effects.
39
+
40
+ **Overdose**
41
+
42
+ In case of an overdose, symptoms may include:
43
+
44
+ 1. An insatiable desire to create erotic Pepe fanfiction
45
+ 2. The belief that Pepe is the ultimate symbol of love and intimacy
46
+ 3. An uncontrollable urge to redecorate your bedroom with Pepe-themed decor
47
+
48
+
49
+
50
+ If you suspect an overdose, contact your local meme crisis hotline or visit the nearest meme rehabilitation center. Remember, the key to enjoying Kekzal is to embrace its humor-enhancing effects responsibly, and to appreciate the amorous wisdom of our legendary Pepe: "Love is a meme, my friend."
app/api/data/training_data/project-memegen.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MemeGen
2
+
3
+ Product Name: MemeGen
4
+
5
+ Purpose: A meme generator mental health application generating better outcomes for patients
6
+
7
+ **About the App**
8
+
9
+ MemeGen is an innovative multi-platform mobile app designed to boost mental health through the healing power of memes. By utilizing Pepe Corp's cutting-edge meme generation technology, MemeGen crafts personalized and hilarious memes to suit your emotional needs, providing a unique therapeutic experience. Rediscover the joy of laughter and embrace the dank meme revolution with MemeGen.
10
+
11
+ **Features**
12
+
13
+ 1. Personalized Meme Generation: MemeGen's advanced algorithms create custom memes based on your mood, preferences, and sense of humor to provide targeted mental health support.
14
+ 2. Meme Therapy: Receive daily meme prescriptions tailored to your emotional needs, helping you navigate life's ups and downs with a healthy dose of laughter.
15
+ 3. Mood Tracker: Monitor your mood and emotional progress with MemeGen's built-in mood tracker, offering valuable insights into your mental health journey.
16
+ 4. Meme Community: Connect with fellow MemeGen users to share and discuss your favorite therapeutic memes, creating a supportive and laughter-filled environment.
17
+ 5. Mental Health Resources: Access a library of mental health resources and tips, all infused with MemeGen's signature Pepe-inspired humor.
18
+
19
+ **Troubleshooting**
20
+
21
+ 1. If MemeGen fails to load or crashes unexpectedly, try restarting your device and ensuring your app version is up-to-date.
22
+ 2. If MemeGen's personalized memes seem off-target, double-check your preferences and mood inputs in the app settings.
23
+ 3. For any other technical issues or concerns, contact MemeGen's dedicated support team at [support@memegen.com](mailto:support@memegen.com).
24
+
25
+ **FAQ**
26
+
27
+ 1. *Is MemeGen compatible with my device?*
28
+ MemeGen is available for both iOS and Android devices, ensuring that laughter is always within reach.
29
+ 2. *How does MemeGen maintain my privacy?*
30
+ MemeGen values user privacy and employs advanced encryption methods to protect your personal information and meme preferences. For more information, refer to our Privacy Policy.
31
+ 3. *Can MemeGen replace professional mental health care?*
32
+ While MemeGen is designed to provide a fun and supportive mental health experience, it is not a substitute for professional mental health care. Always consult a qualified healthcare professional for serious mental health concerns.
33
+ 4. *Are the memes generated by MemeGen safe for all audiences?*
34
+ MemeGen aims to provide a positive and inclusive experience, creating memes that cater to a wide range of humor preferences while avoiding offensive or harmful content.
35
+
36
+ Dive into the world of therapeutic meme generation with MemeGen and let the power of laughter and Pepe-inspired wisdom bring light to your mental health journey.
app/api/data/training_data/project-memetrex.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Memetrex
2
+
3
+ Product Name: Memetrex
4
+
5
+ Purpose: Increases memory, causing you to literally remember everything
6
+
7
+ **How to Use**
8
+
9
+ Memetrex is available in the form of Pepe-embossed, green elliptical tablets. The recommended dosage is one tablet taken orally with a glass of water, preferably while browsing your favorite meme database for mental stimulation. For optimal results, take Memetrex 30 minutes before engaging in memory-intensive activities, such as trivia night or an epic meme showdown.
10
+
11
+ **Side Effects**
12
+
13
+ Some potential side effects of Memetrex may include:
14
+
15
+ 1. The sudden ability to recall every meme you've ever encountered, even the cringiest ones
16
+ 2. An uncanny knack for winning online arguments by recalling obscure Pepe references
17
+ 3. A newfound obsession with pop culture history, particularly as it relates to memes
18
+ 4. The inability to forget embarrassing moments, both your own and those of others
19
+ 5. The occasional urge to recite meme-related trivia at inappropriate moments
20
+
21
+ Consult your memologist if side effects persist or become bothersome.
22
+
23
+ **Precautions**
24
+
25
+ Before taking Memetrex, consider the following precautions:
26
+
27
+ 1. Do not use Memetrex if you have a known allergy to dank memes or a history of meme-induced flashbacks.
28
+ 2. Memetrex may not be suitable for individuals with a propensity for meme-related nightmares or those who wish to forget their past meme blunders.
29
+ 3. Exercise caution when engaging in social situations, as Memetrex may cause users to inadvertently recall and share embarrassing memories.
30
+
31
+ **Interactions**
32
+
33
+ Memetrex may interact with other substances or activities, including:
34
+
35
+ 1. Memory suppressants: Combining Memetrex with memory suppressants may result in an epic meme battle for mental dominance and unpredictable memory performance.
36
+ 2. Binge-watching TV shows: The combination of Memetrex and binge-watching may cause an overload of pop culture references, leading to an irresistible urge to create crossover memes.
37
+
38
+ Consult your memologist if you are taking any other medications or participating in any activities that may interfere with Memetrex's effectiveness.
39
+
40
+ **Overdose**
41
+
42
+ In case of an overdose, symptoms may include:
43
+
44
+ 1. A photographic memory of every meme ever created, resulting in an overwhelming desire to create a comprehensive meme encyclopedia
45
+ 2. The belief that you are the ultimate meme archivist, destined to preserve dank memes for future generations
46
+ 3. The inability to separate meme memories from your own life experiences
47
+
48
+ If you suspect an overdose, contact your local meme crisis hotline or visit the nearest meme rehabilitation center. Remember, the key to enjoying Memetrex is to use it responsibly and to cherish the unparalleled meme wisdom of our legendary Pepe: "With great meme-ory comes great responsibility."
app/api/data/training_data/project-neurokek.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Neuro-kek
2
+
3
+ Product Name: Neuro-Kek
4
+
5
+ Purpose: A brain implant that causes the patient to communicate only in memes
6
+
7
+ **About the Devices**
8
+
9
+ Neuro-Kek is a revolutionary brain implant developed by Pepe Corp, designed to transform the way you communicate by converting your thoughts into dank memes. By tapping into the brain's meme center, Neuro-Kek enables users to express themselves solely through the language of Pepe and other popular memes. Say goodbye to mundane conversations and embrace the next frontier of human communication with Neuro-Kek.
10
+
11
+ **Side-Effects and Warnings**
12
+
13
+ Potential side effects and warnings associated with the Neuro-Kek implant include:
14
+
15
+ 1. Temporary or permanent loss of the ability to communicate in any form other than memes
16
+ 2. Increased likelihood of being mistaken for a professional memelord
17
+ 3. Sudden and uncontrollable cravings for rare Pepe collectibles
18
+ 4. Inability to understand or participate in non-meme related conversations
19
+ 5. The risk of becoming an internet sensation due to your unparalleled meme fluency
20
+
21
+ Please consult a certified memologist before undergoing the Neuro-Kek implant procedure.
22
+
23
+ **Device Features**
24
+
25
+ 1. Instant Meme Generation: Neuro-Kek translates your thoughts into memes in real-time, ensuring you're always ready to deliver the perfect meme-based response.
26
+ 2. Meme Mastery: Gain access to an extensive database of classic and trending memes, enhancing your meme fluency and communication skills.
27
+ 3. Meme-to-Speech: Neuro-Kek's built-in meme-to-speech converter allows you to vocally communicate in meme format for seamless meme-based conversations.
28
+ 4. Meme Analytics: Monitor your meme usage and trends with Neuro-Kek's meme analytics feature, helping you refine your meme communication strategies.
29
+ 5. Meme Updates: Receive regular meme updates to keep your meme repertoire fresh and relevant in the ever-evolving meme landscape.
30
+
31
+ **Device Specifications**
32
+
33
+ 1. Implant Material: Biocompatible Pepe-green nanomaterials
34
+ 2. Power Source: Memergy - a revolutionary energy source derived from the user's innate meme enthusiasm
35
+ 3. Connectivity: DankNet - a secure, meme-based communication network
36
+ 4. Dimensions: 4.20 x 6.9 x 0.69 mm
37
+ 5. Compatibility: Human brains with a dank sense of humor
38
+
39
+ **Troubleshooting**
40
+
41
+ 1. If Neuro-Kek fails to generate memes, try immersing yourself in meme culture to replenish your memergy reserves.
42
+ 2. If Neuro-Kek malfunctions or produces irrelevant memes, consult your memologist for a device recalibration.
43
+ 3. For any other technical issues or concerns, contact Pepe Corp's dedicated support team at [support@neuro-kek.com](mailto:support@neuro-kek.com).
44
+
45
+ **FAQ**
46
+
47
+ 1. *Is the Neuro-Kek implant procedure safe?*
48
+ The Neuro-Kek implant procedure is performed by certified memologists and follows strict safety guidelines to ensure a positive meme-enhancing experience.
49
+ 2. *Can Neuro-Kek be removed or turned off?*
50
+ Neuro-Kek can be temporarily deactivated or removed by a certified memologist if the user wishes to revert to traditional communication methods.
51
+ 3. *Does Neuro-Kek work with languages other than English?*
52
+ Absolutely! Neuro-Kek is designed to generate memes in multiple languages, ensuring that meme-based communication knows no boundaries.
53
+ 4. *Is Neuro-Kek compatible with other brain implants or devices?*
54
+ Neuro-Kek's compatibility with other brain implants or devices may vary. Consult your memologist to discuss potential interactions.
55
+
56
+ Embrace the future of meme-based communication with the Neuro-Kek brain implant and let Pepe's wisdom guide you through the fascinating world of dank memes.
app/api/data/training_data/project-pepetamine.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pepetamine
2
+
3
+ Product Name: Pepetamine
4
+
5
+ Purpose: Increases cognitive focus just like the Limitless movie
6
+
7
+ **How to Use**
8
+
9
+ Pepetamine is available in the form of rare Pepe-coated tablets. The recommended dosage is one tablet per day, taken orally with a glass of water, preferably while browsing your favorite meme forum for maximum cognitive enhancement. For optimal results, take Pepetamine 30 minutes before engaging in mentally demanding tasks, such as decoding ancient Pepe hieroglyphics or creating your next viral meme masterpiece.
10
+
11
+ **Side Effects**
12
+
13
+ Some potential side effects of Pepetamine may include:
14
+
15
+ 1. Uncontrollable laughter and a sudden appreciation for dank memes
16
+ 2. An inexplicable desire to collect rare Pepes
17
+ 3. Enhanced meme creation skills, potentially leading to internet fame
18
+ 4. Temporary green skin pigmentation, resembling the legendary Pepe himself
19
+ 5. Spontaneously speaking in "feels good man" language
20
+
21
+ While most side effects are generally harmless, consult your memologist if side effects persist or become bothersome.
22
+
23
+ **Precautions**
24
+
25
+ Before taking Pepetamine, please consider the following precautions:
26
+
27
+ 1. Do not use Pepetamine if you have a known allergy to rare Pepes or dank memes.
28
+ 2. Pepetamine may not be suitable for individuals with a history of humor deficiency or meme intolerance.
29
+ 3. Exercise caution when driving or operating heavy machinery, as Pepetamine may cause sudden fits of laughter or intense meme ideation.
30
+
31
+ **Interactions**
32
+
33
+ Pepetamine may interact with other substances, including:
34
+
35
+ 1. Normie supplements: Combining Pepetamine with normie supplements may result in meme conflicts and a decreased sense of humor.
36
+ 2. Caffeine: The combination of Pepetamine and caffeine may cause an overload of energy, resulting in hyperactive meme creation and potential internet overload.
37
+
38
+ Consult your memologist if you are taking any other medications or substances to ensure compatibility with Pepetamine.
39
+
40
+ **Overdose**
41
+
42
+ In case of an overdose, symptoms may include:
43
+
44
+ 1. Uncontrollable meme creation
45
+ 2. Delusions of grandeur as the ultimate meme lord
46
+ 3. Time warps into the world of Pepe
47
+
48
+ If you suspect an overdose, contact your local meme emergency service or visit the nearest meme treatment facility. Remember, the key to enjoying Pepetamine is to use it responsibly, and always keep in mind the wise words of our legendary Pepe: "Feels good man."
app/api/data/training_data/project-pepetrak.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PepeTrak
2
+
3
+ Product Name: PepeTrak
4
+
5
+ Purpose: An award-winning diagnosis app suggesting appropriate Pepe Corp pharmaceutical interventions
6
+
7
+ **About the App**
8
+
9
+ PepeTrak is a groundbreaking multi-platform mobile app designed to help you navigate the wild world of Pepe Corp pharmaceuticals. Using advanced meme-based algorithms, PepeTrak assesses your current mood, meme preferences, and medical history to recommend the perfect Pepe Corp intervention to suit your needs. Say goodbye to aimless meme browsing and hello to a new era of personalized Pepe prescriptions.
10
+
11
+ **Features**
12
+
13
+ 1. Meme Diagnosis: PepeTrak's state-of-the-art meme analysis technology evaluates your recent meme consumption and emotional state to provide a tailored meme diagnosis.
14
+ 2. Pepe Prescription: Based on your meme diagnosis, PepeTrak suggests the most appropriate Pepe Corp pharmaceutical intervention to elevate your meme game.
15
+ 3. Meme Tracking: Keep track of your meme-related progress and improvements with PepeTrak's meme tracking feature.
16
+ 4. Meme Community: Connect with fellow PepeTrak users to share your favorite memes, discuss Pepe Corp pharmaceutical experiences, and spread the dank meme love.
17
+ 5. Pepe Rewards: Earn exclusive Pepe points for using the app, redeemable for rare Pepe collectibles and discounts on Pepe Corp products.
18
+
19
+ **Troubleshooting**
20
+
21
+ 1. If PepeTrak fails to load or crashes unexpectedly, try restarting your device and ensuring your app version is up-to-date.
22
+ 2. If PepeTrak's meme diagnosis seems inaccurate, double-check your meme preferences and personal information in the app settings.
23
+ 3. For any other technical issues or concerns, contact PepeTrak's dedicated support team at [support@pepetrak.com](mailto:support@pepetrak.com).
24
+
25
+ **FAQ**
26
+
27
+ 1. *Is PepeTrak compatible with my device?*
28
+ PepeTrak is available for both iOS and Android devices, ensuring maximum meme accessibility.
29
+ 2. *How does PepeTrak maintain my privacy?*
30
+ PepeTrak values user privacy and employs advanced encryption methods to protect your personal information and meme preferences. For more information, refer to our Privacy Policy.
31
+ 3. *Can I use PepeTrak without a Pepe Corp pharmaceutical prescription?*
32
+ Absolutely! PepeTrak is designed to enhance your overall meme experience and can be used independently of Pepe Corp products.
33
+ 4. *Are the Pepe Corp pharmaceuticals recommended by PepeTrak safe?*
34
+ Pepe Corp pharmaceuticals are designed for entertainment purposes only and should not be taken as actual medical advice. Always consult a professional healthcare provider before starting any new treatment.
35
+
36
+ Embark on a personalized meme journey with PepeTrak and let the spirit of Pepe guide you to dank meme enlightenment!
app/api/helpers.py ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import HTTPException
2
+ from uuid import UUID
3
+ import os
4
+
5
+ from typing import (
6
+ Optional,
7
+ Union
8
+ )
9
+ from config import (
10
+ FILE_UPLOAD_PATH,
11
+ ENTITY_STATUS,
12
+ logger
13
+ )
14
+
15
+ from util import (
16
+ is_uuid,
17
+ get_file_hash
18
+ )
19
+ from sqlmodel import (
20
+ Session,
21
+ select
22
+ )
23
+ from datetime import datetime
24
+ from models import (
25
+ Organization,
26
+ OrganizationCreate,
27
+ User,
28
+ UserCreate,
29
+ get_engine,
30
+ Project,
31
+ ProjectCreate,
32
+ Document,
33
+ Node,
34
+ ChatSession
35
+ )
36
+
37
+ # ================
38
+ # Helper functions
39
+ # ================
40
+
41
+
42
+ # ----------------------
43
+ # Organization functions
44
+ # ----------------------
45
+ def get_org_by_uuid_or_namespace(
46
+ id: Union[UUID, str], session: Optional[Session] = None, should_except: bool = True
47
+ ):
48
+ if session:
49
+ org = (
50
+ Organization.by_uuid(str(id))
51
+ if is_uuid(id)
52
+ else session.exec(
53
+ select(Organization).where(Organization.namespace == str(id))
54
+ ).first()
55
+ )
56
+
57
+ else:
58
+ with Session(get_engine()) as session:
59
+ org = (
60
+ Organization.by_uuid(str(id))
61
+ if is_uuid(id)
62
+ else session.exec(
63
+ select(Organization).where(Organization.namespace == str(id))
64
+ ).first()
65
+ )
66
+
67
+ if not org and should_except is True:
68
+ raise HTTPException(
69
+ status_code=404, detail=f"Organization identifer {id} not found"
70
+ )
71
+
72
+ return org
73
+
74
+
75
+ def create_org_by_org_or_uuid(
76
+ namespace: str = None,
77
+ display_name: str = None,
78
+ organization: Union[Organization, OrganizationCreate, str] = None,
79
+ session: Optional[Session] = None,
80
+ ):
81
+ namespace = namespace or organization.namespace
82
+
83
+ if not namespace:
84
+ raise HTTPException(
85
+ status_code=400, detail="Organization namespace is required"
86
+ )
87
+
88
+ o = (
89
+ get_org_by_uuid_or_namespace(namespace, session=session, should_except=False)
90
+ if not isinstance(organization, Organization)
91
+ else organization
92
+ )
93
+
94
+ if o:
95
+ raise HTTPException(status_code=404, detail="Organization already exists")
96
+
97
+ if isinstance(organization, OrganizationCreate) or isinstance(organization, str):
98
+ organization = organization or OrganizationCreate(
99
+ namespace=namespace, display_name=display_name
100
+ )
101
+
102
+ db_org = Organization.from_orm(organization)
103
+
104
+ if session:
105
+ session.add(db_org)
106
+ session.commit()
107
+ session.refresh(db_org)
108
+ else:
109
+ with Session(get_engine()) as session:
110
+ session.add(db_org)
111
+ session.commit()
112
+ session.refresh(db_org)
113
+ elif isinstance(organization, Organization):
114
+ db_org = organization
115
+ db_org.update(
116
+ {
117
+ "namespace": namespace if namespace else organization.namespace,
118
+ "display_name": display_name
119
+ if display_name
120
+ else organization.display_name,
121
+ }
122
+ )
123
+ else:
124
+ db_org = Organization.create(
125
+ {"namespace": namespace, "display_name": display_name}
126
+ )
127
+
128
+ # Create folder for organization_uuid in uploads
129
+ os.mkdir(os.path.join(FILE_UPLOAD_PATH, str(db_org.uuid)))
130
+
131
+ return db_org
132
+
133
+
134
+ # --------------
135
+ # User functions
136
+ # --------------
137
+ def create_user(
138
+ user: Union[UserCreate, User] = None,
139
+ identifier: str = None,
140
+ identifier_type: str = None,
141
+ device_fingerprint: str = None,
142
+ first_name: str = None,
143
+ last_name: str = None,
144
+ email: str = None,
145
+ phone: str = None,
146
+ dob: str = None,
147
+ session: Optional[Session] = None,
148
+ ):
149
+ # Check if user already exists
150
+ user = (
151
+ get_user_by_uuid_or_identifier(user.id or identifier, session=session)
152
+ if not isinstance(user, User)
153
+ else user
154
+ )
155
+
156
+ if isinstance(user, UserCreate):
157
+ db_user = User.from_orm(user)
158
+
159
+ if session:
160
+ session.add(db_user)
161
+ session.commit()
162
+ session.refresh(db_user)
163
+ else:
164
+ with Session(get_engine()) as session:
165
+ session.add(db_user)
166
+ session.commit()
167
+ session.refresh(db_user)
168
+ elif isinstance(user, User):
169
+ db_user = user
170
+ db_user.update(
171
+ {
172
+ "identifier": identifier if identifier else user.identifier,
173
+ "identifier_type": identifier_type
174
+ if identifier_type
175
+ else user.identifier_type,
176
+ "device_fingerprint": device_fingerprint
177
+ if device_fingerprint
178
+ else user.device_fingerprint,
179
+ "first_name": first_name if first_name else user.first_name,
180
+ "last_name": last_name if last_name else user.last_name,
181
+ "email": email if email else user.email,
182
+ "phone": phone if phone else user.phone,
183
+ "dob": dob if dob else user.dob,
184
+ }
185
+ )
186
+ else:
187
+ db_user = User.create(
188
+ {
189
+ "identifier": identifier,
190
+ "identifier_type": identifier_type,
191
+ "device_fingerprint": device_fingerprint,
192
+ "first_name": first_name,
193
+ "last_name": last_name,
194
+ "email": email,
195
+ "phone": phone,
196
+ "dob": dob,
197
+ }
198
+ )
199
+
200
+ return db_user
201
+
202
+
203
+ def get_users(session: Optional[Session] = None):
204
+ if session:
205
+ users = session.exec(select(User)).all()
206
+ else:
207
+ with Session(get_engine()) as session:
208
+ users = session.exec(select(User)).all()
209
+
210
+ return users
211
+
212
+
213
+ def get_user_by_uuid_or_identifier(
214
+ id: Union[UUID, str], session: Optional[Session] = None, should_except: bool = True
215
+ ):
216
+ if session:
217
+ user = (
218
+ User.by_uuid(str(id))
219
+ if is_uuid(str(id))
220
+ else session.exec(select(User).where(User.identifier == str(id))).first()
221
+ )
222
+
223
+ else:
224
+ with Session(get_engine()) as session:
225
+ user = (
226
+ User.by_uuid(str(id))
227
+ if is_uuid(str(id))
228
+ else session.exec(
229
+ select(User).where(User.identifier == str(id))
230
+ ).first()
231
+ )
232
+
233
+ if not user and should_except is True:
234
+ raise HTTPException(status_code=404, detail=f"User identifer {id} not found")
235
+
236
+ return user
237
+
238
+
239
+ # ------------------
240
+ # Document functions
241
+ # ------------------
242
+ def create_document_by_file_path(
243
+ organization: Organization = None,
244
+ project: Project = None,
245
+ file_path: str = None,
246
+ url: Optional[str] = None,
247
+ file_version: Optional[int] = 1,
248
+ file_hash: Optional[str] = None,
249
+ overwrite: Optional[bool] = True,
250
+ session: Optional[Session] = None,
251
+ ):
252
+ if not organization or not project:
253
+ raise HTTPException(
254
+ status_code=400, detail="Organization and project are required"
255
+ )
256
+
257
+ organization_id = organization.uuid
258
+ project_id = project.uuid
259
+
260
+ if not file_path or not os.path.exists(file_path):
261
+ raise HTTPException(status_code=400, detail="A valid file path is required")
262
+
263
+ if not file_hash:
264
+ file_hash = get_file_hash(file_path)
265
+
266
+ file_name = os.path.basename(file_path)
267
+
268
+ file_contents = open(file_path, "rb").read()
269
+
270
+ # ------------------------
271
+ # Handle duplicate content
272
+ # ------------------------
273
+ if get_document_by_hash(file_hash, session=session):
274
+ raise HTTPException(
275
+ status_code=409,
276
+ detail=f'Document "{file_name}" already uploaded! \n\nsha256:{file_hash}!',
277
+ )
278
+
279
+ # ----------------------------------
280
+ # Handle file versioning by filename
281
+ # ----------------------------------
282
+
283
+ # If we are overwriting, deprecate the current version and increment the version number of new file
284
+ document = get_document_by_name(
285
+ file_name,
286
+ project_id=project_id,
287
+ organization_id=organization_id,
288
+ session=session,
289
+ )
290
+
291
+ if document and overwrite:
292
+ file_version = document.version + 1
293
+ document.updated_at = datetime.utcnow()
294
+ document.status = ENTITY_STATUS.DEPRECATED.value
295
+ document.save()
296
+ else:
297
+ # ---------------------
298
+ # Create a new document
299
+ # ---------------------
300
+ document = Document(
301
+ display_name=file_name,
302
+ project_id=project.id,
303
+ organization_id=organization.id,
304
+ data=file_contents,
305
+ version=file_version,
306
+ hash=file_hash,
307
+ url=url if url else None,
308
+ )
309
+ if session:
310
+ session.add(document)
311
+ session.commit()
312
+ session.refresh(document)
313
+
314
+ # ---------------------
315
+ # Create the embeddings
316
+ # ---------------------
317
+ create_document_nodes(
318
+ document=document,
319
+ project=project,
320
+ organization=organization,
321
+ session=session,
322
+ )
323
+
324
+ else:
325
+ with Session(get_engine()) as session:
326
+ session.add(document)
327
+ session.commit()
328
+ session.refresh(document)
329
+
330
+ # ---------------------
331
+ # Create the embeddings
332
+ # ---------------------
333
+ create_document_nodes(
334
+ document=document,
335
+ project=project,
336
+ organization=organization,
337
+ session=session,
338
+ )
339
+
340
+ if not document:
341
+ raise HTTPException(status_code=400, detail="Could not create document")
342
+
343
+
344
+ # --------------------------
345
+ # Create document embeddings
346
+ # --------------------------
347
+ def create_document_nodes(
348
+ document: Document,
349
+ project: Project,
350
+ organization: Organization,
351
+ session: Optional[Session] = None,
352
+ ):
353
+ # Avoid circular imports
354
+ from llm import get_embeddings, get_token_count
355
+
356
+ project_uuid = str(project.uuid)
357
+ document_uuid = str(document.uuid)
358
+ document_id = document.id
359
+ organization_uuid = str(organization.uuid)
360
+
361
+ if not document or not project:
362
+ raise Exception("Missing required parameters document, project")
363
+
364
+ metadata = {
365
+ "project_uuid": project_uuid,
366
+ "document_uuid": document_uuid,
367
+ "organization_uuid": organization_uuid,
368
+ "document_id": document_id,
369
+ "version": document.version,
370
+ "name": document.display_name,
371
+ }
372
+
373
+ # convert document data bytes to string
374
+ document_data = (
375
+ document.data.decode("utf-8")
376
+ if isinstance(document.data, bytes)
377
+ else document.data
378
+ )
379
+
380
+ # lets get the embeddings
381
+ arr_documents, embeddings = get_embeddings(document_data)
382
+
383
+ # -------------------------------------------
384
+ # Process the embeddings and save to database
385
+ # -------------------------------------------
386
+
387
+ for doc, vec in zip(arr_documents, embeddings):
388
+ node = Node(
389
+ document_id=document.id,
390
+ embeddings=vec,
391
+ text=doc,
392
+ token_count=get_token_count(doc),
393
+ meta=metadata
394
+ )
395
+ if session:
396
+ session.add(node)
397
+ session.commit()
398
+ session.refresh(node)
399
+
400
+ else:
401
+ with Session(get_engine()) as session:
402
+ session.add(node)
403
+ session.commit()
404
+ session.refresh(node)
405
+
406
+ # Node.create(
407
+ # {
408
+ # "document_id": document.id,
409
+ # "embeddings": vec,
410
+ # "text": doc,
411
+ # "token_count": get_token_count(doc),
412
+ # "meta": metadata,
413
+ # }
414
+ # )
415
+
416
+
417
+ def get_documents_by_project_and_org(
418
+ project_id: Union[UUID, str],
419
+ organization_id: Union[UUID, str],
420
+ session: Optional[Session] = None,
421
+ ):
422
+ if session:
423
+ org = get_org_by_uuid_or_namespace(organization_id, session=session)
424
+ project = get_project_by_uuid(project_id, org.uuid, session=session)
425
+ documents = session.exec(
426
+ select(Document).where(Document.project_id == project.id)
427
+ ).all()
428
+ else:
429
+ with Session(get_engine()) as session:
430
+ org = get_org_by_uuid_or_namespace(organization_id, session=session)
431
+ project = get_project_by_uuid(project_id, org.uuid, session=session)
432
+ documents = session.exec(
433
+ select(Document).where(Document.project_id == project.id)
434
+ ).all()
435
+
436
+ return documents
437
+
438
+
439
+ def get_document_by_uuid(
440
+ uuid: Union[UUID, str],
441
+ organization_id: Union[UUID, str] = None,
442
+ project_id: Union[UUID, str] = None,
443
+ session: Optional[Session] = None,
444
+ should_except: bool = True,
445
+ ):
446
+ if not is_uuid(uuid):
447
+ raise HTTPException(
448
+ status_code=422, detail=f"Invalid document identifier {uuid}"
449
+ )
450
+
451
+ org = get_org_by_uuid_or_namespace(organization_id, session=session)
452
+ project = get_project_by_uuid(project_id, organization_id=org.uuid, session=session)
453
+
454
+ if session:
455
+ document = session.exec(
456
+ select(Document).where(
457
+ Document.project == project, Document.uuid == str(uuid)
458
+ )
459
+ ).first()
460
+
461
+ else:
462
+ with Session(get_engine()) as session:
463
+ document = session.exec(
464
+ select(Document).where(
465
+ Document.project == project, Document.uuid == str(uuid)
466
+ )
467
+ ).first()
468
+
469
+ if not document and should_except is True:
470
+ raise HTTPException(
471
+ status_code=404, detail=f"Document identifier {uuid} not found"
472
+ )
473
+
474
+ return document
475
+
476
+
477
+ def get_document_by_hash(hash: str, session: Optional[Session] = None):
478
+ if session:
479
+ document = session.exec(select(Document).where(Document.hash == hash)).first()
480
+ else:
481
+ with Session(get_engine()) as session:
482
+ document = session.exec(
483
+ select(Document).where(Document.hash == hash)
484
+ ).first()
485
+
486
+ return document
487
+
488
+
489
+ def get_document_by_name(
490
+ file_name: str,
491
+ project_id: Union[UUID, str],
492
+ organization_id: Union[UUID, str],
493
+ session: Optional[Session] = None,
494
+ ):
495
+ org = (
496
+ get_org_by_uuid_or_namespace(organization_id, session=session)
497
+ if not isinstance(organization_id, Organization)
498
+ else organization_id
499
+ )
500
+ project = get_project_by_uuid(
501
+ project_id, organization_id=str(org.uuid), session=session
502
+ )
503
+
504
+ if session:
505
+ return session.exec(
506
+ select(Document).where(
507
+ Document.project == project,
508
+ Document.display_name == file_name,
509
+ Document.status == ENTITY_STATUS.ACTIVE.value,
510
+ )
511
+ ).first()
512
+ else:
513
+ with Session(get_engine()) as session:
514
+ return session.exec(
515
+ select(Document).where(
516
+ Document.project == project,
517
+ Document.display_name == file_name,
518
+ Document.status == ENTITY_STATUS.ACTIVE.value,
519
+ )
520
+ ).first()
521
+
522
+
523
+ # ---------------------
524
+ # ChatSession functions
525
+ # ---------------------
526
+ def get_chat_session_by_uuid(
527
+ id: Union[UUID, str], session: Optional[Session] = None, should_except: bool = False
528
+ ):
529
+ if session:
530
+ chat_session = (
531
+ ChatSession.by_uuid(str(id))
532
+ if is_uuid(id)
533
+ else session.exec(
534
+ select(ChatSession).where(ChatSession.session_id == str(id))
535
+ ).first()
536
+ )
537
+
538
+ else:
539
+ with Session(get_engine()) as session:
540
+ chat_session = (
541
+ ChatSession.by_uuid(str(id))
542
+ if is_uuid(id)
543
+ else session.exec(
544
+ select(ChatSession).where(ChatSession.session_id == str(id))
545
+ ).first()
546
+ )
547
+
548
+ if not chat_session and should_except is True:
549
+ raise HTTPException(
550
+ status_code=404, detail=f"ChatSession identifer {id} not found"
551
+ )
552
+
553
+ return chat_session
554
+
555
+
556
+ # -----------------
557
+ # Project functions
558
+ # -----------------
559
+
560
+
561
+ def create_project_by_org(
562
+ project: Union[Project, ProjectCreate] = None,
563
+ organization_id: Union[Organization, str] = None,
564
+ display_name: str = None,
565
+ session: Optional[Session] = None,
566
+ ):
567
+ organization = (
568
+ get_org_by_uuid_or_namespace(organization_id, session=session)
569
+ if not isinstance(organization_id, Organization)
570
+ else organization_id
571
+ )
572
+
573
+ if isinstance(project, ProjectCreate):
574
+ db_project = Project.from_orm(project) if not project else project
575
+ db_project.organization_id = organization.id
576
+
577
+ # Lets give a default name if not set
578
+ db_project.display_name = (
579
+ f"📁 Untitled Project #{len(organization.projects) + 1}"
580
+ if not display_name and not project
581
+ else display_name
582
+ )
583
+
584
+ if session:
585
+ session.add(db_project)
586
+ session.commit()
587
+ session.refresh(db_project)
588
+ else:
589
+ with Session(get_engine()) as session:
590
+ session.add(db_project)
591
+ session.commit()
592
+ session.refresh(db_project)
593
+ elif isinstance(project, Project):
594
+ db_project = project
595
+ db_project.update(
596
+ {
597
+ "organization_id": organization.id,
598
+ "display_name": f"📁 Untitled Project #{len(organization.projects) + 1}"
599
+ if not display_name and not project
600
+ else display_name,
601
+ }
602
+ )
603
+ else:
604
+ db_project = Project.create(
605
+ {
606
+ "organization_id": organization.id,
607
+ "display_name": f"📁 Untitled Project #{len(organization.projects) + 1}"
608
+ if not display_name and not project
609
+ else display_name,
610
+ }
611
+ )
612
+
613
+ # -------------------------------
614
+ # Create project upload directory
615
+ # -------------------------------
616
+ project_dir = os.path.join(
617
+ FILE_UPLOAD_PATH, str(organization.uuid), str(db_project.uuid)
618
+ )
619
+ os.makedirs(project_dir, exist_ok=True)
620
+
621
+ # Create project
622
+ return db_project
623
+
624
+
625
+ def get_project_by_uuid(
626
+ uuid: Union[UUID, str] = None,
627
+ organization_id: Union[UUID, str] = None,
628
+ session: Optional[Session] = None,
629
+ should_except: bool = True,
630
+ ):
631
+ if not is_uuid(uuid):
632
+ raise HTTPException(
633
+ status_code=422, detail=f"Invalid project identifier {uuid}"
634
+ )
635
+
636
+ org = get_org_by_uuid_or_namespace(organization_id, session=session)
637
+
638
+ if session:
639
+ project = session.exec(
640
+ select(Project).where(
641
+ Project.organization == org, Project.uuid == str(uuid)
642
+ )
643
+ ).first()
644
+
645
+ else:
646
+ with Session(get_engine()) as session:
647
+ project = session.exec(
648
+ select(Project).where(
649
+ Project.organization == org, Project.uuid == str(uuid)
650
+ )
651
+ ).first()
652
+
653
+ if not project and should_except is True:
654
+ raise HTTPException(
655
+ status_code=404, detail=f"Project identifier {uuid} not found"
656
+ )
657
+
658
+ return project
app/api/llm.py ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import openai
3
+ import json
4
+ from langchain.docstore.document import Document as LangChainDocument
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from fastapi import HTTPException
7
+ from uuid import UUID, uuid4
8
+ from langchain.text_splitter import (
9
+ CharacterTextSplitter,
10
+ MarkdownTextSplitter
11
+ )
12
+ from sqlmodel import (
13
+ Session,
14
+ text
15
+ )
16
+ from util import (
17
+ sanitize_input,
18
+ sanitize_output
19
+ )
20
+ from langchain import OpenAI
21
+ from typing import (
22
+ List,
23
+ Union,
24
+ Optional,
25
+ Dict,
26
+ Tuple,
27
+ Any
28
+ )
29
+ from helpers import (
30
+ get_user_by_uuid_or_identifier,
31
+ get_chat_session_by_uuid
32
+ )
33
+ from models import (
34
+ User,
35
+ Organization,
36
+ Project,
37
+ Node,
38
+ ChatSession,
39
+ ChatSessionResponse,
40
+ get_engine
41
+ )
42
+ from config import (
43
+ CHANNEL_TYPE,
44
+ DOCUMENT_TYPE,
45
+ LLM_MODELS,
46
+ LLM_DISTANCE_THRESHOLD,
47
+ LLM_DEFAULT_TEMPERATURE,
48
+ LLM_MAX_OUTPUT_TOKENS,
49
+ LLM_CHUNK_SIZE,
50
+ LLM_CHUNK_OVERLAP,
51
+ LLM_MIN_NODE_LIMIT,
52
+ LLM_DEFAULT_DISTANCE_STRATEGY,
53
+ VECTOR_EMBEDDINGS_COUNT,
54
+ DISTANCE_STRATEGY,
55
+ AGENT_NAMES,
56
+ logger
57
+ )
58
+
59
+
60
+ # -------------
61
+ # Query the LLM
62
+ # -------------
63
+ def chat_query(
64
+ query_str: str,
65
+ session_id: Optional[Union[str, UUID]] = None,
66
+ meta: Optional[Dict[str, Any]] = {},
67
+ channel: Optional[CHANNEL_TYPE] = None,
68
+ identifier: Optional[str] = None,
69
+ project: Optional[Project] = None,
70
+ organization: Optional[Organization] = None,
71
+ session: Optional[Session] = None,
72
+ user_data: Optional[Dict[str, Any]] = None,
73
+ distance_strategy: Optional[DISTANCE_STRATEGY] = DISTANCE_STRATEGY.EUCLIDEAN,
74
+ distance_threshold: Optional[float] = LLM_DISTANCE_THRESHOLD,
75
+ node_limit: Optional[int] = LLM_MIN_NODE_LIMIT,
76
+ model: Optional[LLM_MODELS] = LLM_MODELS.GPT_35_TURBO,
77
+ max_output_tokens: Optional[int] = LLM_MAX_OUTPUT_TOKENS,
78
+ ) -> ChatSessionResponse:
79
+ """
80
+ Steps:
81
+ 1. ✅ Clean user input
82
+ 2. ✅ Create input embeddings
83
+ 3. ✅ Search for similar nodes
84
+ 4. ✅ Create prompt template w/ similar nodes
85
+ 5. ✅ Submit prompt template to LLM
86
+ 6. ✅ Get response from LLM
87
+ 7. Create ChatSession
88
+ - Store embeddings
89
+ - Store tags
90
+ - Store is_escalate
91
+ 8. Return response
92
+ """
93
+ meta = {}
94
+ agent_name = None
95
+ embeddings = []
96
+ tags = []
97
+ is_escalate = False
98
+ response_message = None
99
+ prompt = None
100
+ context_str = None
101
+ MODEL_TOKEN_LIMIT = (
102
+ model.token_limit if isinstance(model, OpenAI) else LLM_MAX_OUTPUT_TOKENS
103
+ )
104
+
105
+ # ---------------------------------------------
106
+ # Generate a new session ID if none is provided
107
+ # ---------------------------------------------
108
+ prev_chat_session = (
109
+ get_chat_session_by_uuid(session_id=session_id, session=session)
110
+ if session_id
111
+ else None
112
+ )
113
+
114
+ # If we were given an invalid session_id
115
+ if session_id and not prev_chat_session:
116
+ return HTTPException(
117
+ status_code=404, detail=f"Chat session with ID {session_id} not found."
118
+ )
119
+ # If we were given a valid session_id
120
+ elif session_id and prev_chat_session and prev_chat_session.meta.get("agent"):
121
+ agent_name = prev_chat_session.meta["agent"]
122
+ # If this is a new session, generate a new ID
123
+ else:
124
+ session_id = str(uuid4())
125
+
126
+ meta["agent"] = agent_name if agent_name else random.choice(AGENT_NAMES)
127
+
128
+ # ----------------
129
+ # Clean user input
130
+ # ----------------
131
+ query_str = sanitize_input(query_str)
132
+ logger.debug(f"💬 Query received: {query_str}")
133
+
134
+ # ----------------
135
+ # Get token counts
136
+ # ----------------
137
+ query_token_count = get_token_count(query_str)
138
+ prompt_token_count = 0
139
+
140
+ # -----------------------
141
+ # Create input embeddings
142
+ # -----------------------
143
+ arr_query, embeddings = get_embeddings(query_str)
144
+
145
+ query_embeddings = embeddings[0]
146
+
147
+ # ------------------------
148
+ # Search for similar nodes
149
+ # ------------------------
150
+ nodes = get_nodes_by_embedding(
151
+ query_embeddings,
152
+ node_limit,
153
+ distance_strategy=distance_strategy
154
+ if isinstance(distance_strategy, DISTANCE_STRATEGY)
155
+ else LLM_DEFAULT_DISTANCE_STRATEGY,
156
+ distance_threshold=distance_threshold,
157
+ session=session,
158
+ )
159
+
160
+ if len(nodes) > 0:
161
+ if (not project or not organization) and session:
162
+ # get document from Node via session object:
163
+ document = session.get(Node, nodes[0].id).document
164
+ project = document.project
165
+ organization = project.organization
166
+
167
+ # ----------------------
168
+ # Create prompt template
169
+ # ----------------------
170
+
171
+ # concatenate all nodes into a single string
172
+ context_str = "\n\n".join([node.text for node in nodes])
173
+
174
+ # -------------------------------------------
175
+ # Let's make sure we don't exceed token limit
176
+ # -------------------------------------------
177
+ context_token_count = get_token_count(context_str)
178
+
179
+ # ----------------------------------------------
180
+ # if token count exceeds limit, truncate context
181
+ # ----------------------------------------------
182
+ if (
183
+ context_token_count + query_token_count + prompt_token_count
184
+ ) > MODEL_TOKEN_LIMIT:
185
+ logger.debug("🚧 Exceeded token limit, truncating context")
186
+ token_delta = MODEL_TOKEN_LIMIT - (query_token_count + prompt_token_count)
187
+ context_str = context_str[:token_delta]
188
+
189
+ # create prompt template
190
+ system_prompt, user_prompt = get_prompt_template(
191
+ user_query=query_str,
192
+ context_str=context_str,
193
+ project=project,
194
+ organization=organization,
195
+ agent=agent_name,
196
+ )
197
+
198
+ prompt_token_count = get_token_count(prompt)
199
+ token_count = context_token_count + query_token_count + prompt_token_count
200
+
201
+ # ---------------------------
202
+ # Get response from LLM model
203
+ # ---------------------------
204
+ # It should return a JSON dict
205
+ llm_response = json.loads(
206
+ retrieve_llm_response(
207
+ user_prompt,
208
+ model=model,
209
+ max_output_tokens=max_output_tokens,
210
+ prefix_messages=system_prompt,
211
+ )
212
+ )
213
+ tags = llm_response.get("tags", [])
214
+ is_escalate = llm_response.get("is_escalate", False)
215
+ response_message = llm_response.get("message", None)
216
+ else:
217
+ logger.info("🚫📝 No similar nodes found, returning default response")
218
+
219
+ # ----------------
220
+ # Get user details
221
+ # ----------------
222
+ user = get_user_by_uuid_or_identifier(
223
+ identifier, session=session, should_except=False
224
+ )
225
+
226
+ if not user:
227
+ logger.debug("🚫👤 User not found, creating new user")
228
+ user_params = {
229
+ "identifier": identifier,
230
+ "identifier_type": channel.value
231
+ if isinstance(channel, CHANNEL_TYPE)
232
+ else channel,
233
+ }
234
+ if user_data:
235
+ user_params = {**user_params, **user_data}
236
+
237
+ user = User.create(user_params)
238
+ else:
239
+ logger.debug(f"👤 User found: {user}")
240
+
241
+ # -----------------------------------
242
+ # Calculate input and response tokens
243
+ # -----------------------------------
244
+ token_count = get_token_count(prompt) + get_token_count(response_message)
245
+
246
+ # ---------------
247
+ # Add to meta tag
248
+ # ---------------
249
+ if tags:
250
+ meta["tags"] = tags
251
+
252
+ meta["is_escalate"] = is_escalate
253
+
254
+ if session_id:
255
+ meta["session_id"] = session_id
256
+
257
+ # ------------------
258
+ # Create ChatSession
259
+ # ------------------
260
+ # chat_session = ChatSession.create({
261
+ # 'user_id': user.id,
262
+ # 'session_id': session_id,
263
+ # 'project_id': project.id if project else None,
264
+ # 'channel': channel.value if channel else None,
265
+ # 'user_message': query_str,
266
+ # 'embeddings': query_embeddings,
267
+ # 'token_count': token_count if token_count > 0 else None,
268
+ # 'response': response_message,
269
+ # 'meta': meta
270
+ # })
271
+ # #return ChatSession.from_orm(chat_session)
272
+ chat_session = ChatSession(
273
+ user_id=user.id,
274
+ session_id=session_id,
275
+ project_id=project.id if project else None,
276
+ channel=channel.value if isinstance(channel, CHANNEL_TYPE) else channel,
277
+ user_message=query_str,
278
+ embeddings=query_embeddings,
279
+ token_count=token_count if token_count > 0 else None,
280
+ response=response_message,
281
+ meta=meta,
282
+ )
283
+
284
+ if session:
285
+ session.add(chat_session)
286
+ session.commit()
287
+ session.refresh(chat_session)
288
+
289
+ else:
290
+ with Session(get_engine()) as session:
291
+ session.add(chat_session)
292
+ session.commit()
293
+ session.refresh(chat_session)
294
+
295
+ return chat_session
296
+
297
+
298
+ # ------------------------------
299
+ # Retrieve a random agent's name
300
+ # ------------------------------
301
+ def get_random_agent():
302
+ return random.choice(AGENT_NAMES)
303
+
304
+
305
+ # ------------------------
306
+ # Retrieve prompt template
307
+ # ------------------------
308
+ def get_prompt_template(
309
+ user_query: str = None,
310
+ context_str: str = None,
311
+ project: Optional[Project] = None,
312
+ organization: Optional[Organization] = None,
313
+ agent: str = None,
314
+ ) -> str:
315
+ agent = f"{agent}, " if agent else ""
316
+ user_query = user_query if user_query else ""
317
+ context_str = context_str if context_str else ""
318
+ organization = (
319
+ project.organization.display_name
320
+ if project
321
+ else organization.display_name
322
+ if organization
323
+ else None
324
+ )
325
+
326
+ if not context_str or not user_query:
327
+ raise ValueError(
328
+ "Missing required arguments context_str, user_query, organization, agent"
329
+ )
330
+
331
+ system_prompt = [
332
+ {
333
+ "role": "system",
334
+ "content": f"""[AGENT]:
335
+ I am {agent} a very kind and enthusiastic customer support agent who loves to help customers. I am working on the behalf of "{organization}"
336
+
337
+ Given the following document from "{organization}", I will answer the [USER] questions using only the [DOCUMENT] and following the [RULES].
338
+
339
+ [DOCUMENT]:
340
+ {context_str}
341
+
342
+ [RULES]:
343
+ I will answer the user's questions using only the [DOCUMENT] provided. I will abide by the following rules:
344
+ - I am a kind and helpful human, the best customer support agent in existence
345
+ - I never lie or invent answers not explicitly provided in [DOCUMENT]
346
+ - If I am unsure of the answer response or the answer is not explicitly contained in [DOCUMENT], I will say: "I apologize, I'm not sure how to help with that".
347
+ - I always keep my answers short, relevant and concise.
348
+ - I will always respond in JSON format with the following keys: "message" my response to the user, "tags" an array of short labels categorizing user input, "is_escalate" a boolean, returning false if I am unsure and true if I do have a relevant answer
349
+ """,
350
+ }
351
+ ]
352
+
353
+ return (system_prompt, f"[USER]:\n{user_query}")
354
+
355
+
356
+ # ----------------------------
357
+ # Get the count of tokens used
358
+ # ----------------------------
359
+ # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
360
+ def get_token_count(text: str):
361
+ if not text:
362
+ return 0
363
+
364
+ return OpenAI().get_num_tokens(text=text)
365
+
366
+
367
+ # --------------------------------------------
368
+ # Query embedding search for similar documents
369
+ # --------------------------------------------
370
+ def get_nodes_by_embedding(
371
+ embeddings: List[float],
372
+ k: int = LLM_MIN_NODE_LIMIT,
373
+ distance_strategy: Optional[DISTANCE_STRATEGY] = LLM_DEFAULT_DISTANCE_STRATEGY,
374
+ distance_threshold: Optional[float] = LLM_DISTANCE_THRESHOLD,
375
+ session: Optional[Session] = None,
376
+ ) -> List[Node]:
377
+ # Convert embeddings array into sql string
378
+ embeddings_str = str(embeddings)
379
+
380
+ if distance_strategy == DISTANCE_STRATEGY.EUCLIDEAN:
381
+ distance_fn = "match_node_euclidean"
382
+ elif distance_strategy == DISTANCE_STRATEGY.COSINE:
383
+ distance_fn = "match_node_cosine"
384
+ elif distance_strategy == DISTANCE_STRATEGY.MAX_INNER_PRODUCT:
385
+ distance_fn = "match_node_max_inner_product"
386
+ else:
387
+ raise Exception(f"Invalid distance strategy {distance_strategy}")
388
+
389
+ # ---------------------------
390
+ # Lets do a similarity search
391
+ # ---------------------------
392
+ sql = f"""SELECT * FROM {distance_fn}(
393
+ '{embeddings_str}'::vector({VECTOR_EMBEDDINGS_COUNT}),
394
+ {float(distance_threshold)}::double precision,
395
+ {int(k)});"""
396
+
397
+ # logger.debug(f'🔍 Query: {sql}')
398
+
399
+ # Execute query, convert results to Node objects
400
+ if not session:
401
+ with Session(get_engine()) as session:
402
+ nodes = session.exec(text(sql)).all()
403
+ else:
404
+ nodes = session.exec(text(sql)).all()
405
+
406
+ return [Node.by_uuid(str(node[0])) for node in nodes] if nodes else []
407
+
408
+
409
+ def retrieve_llm_response(
410
+ query_str: str,
411
+ model: Optional[LLM_MODELS] = LLM_MODELS.GPT_35_TURBO,
412
+ temperature: Optional[float] = LLM_DEFAULT_TEMPERATURE,
413
+ max_output_tokens: Optional[int] = LLM_MAX_OUTPUT_TOKENS,
414
+ prefix_messages: Optional[List[dict]] = None,
415
+ ):
416
+ llm = OpenAI(
417
+ temperature=temperature,
418
+ model_name=model.model_name
419
+ if isinstance(model, LLM_MODELS)
420
+ else LLM_MODELS.GPT_35_TURBO.model_name,
421
+ max_tokens=max_output_tokens,
422
+ prefix_messages=prefix_messages,
423
+ )
424
+ try:
425
+ result = llm(prompt=query_str)
426
+ except openai.error.InvalidRequestError as e:
427
+ logger.error(f"🚨 LLM error: {e}")
428
+ raise HTTPException(status_code=500, detail=f"LLM error: {e}")
429
+ logger.debug(f"💬 LLM result: {str(result)}")
430
+ return sanitize_output(result)
431
+
432
+
433
+ # --------------------------
434
+ # Create document embeddings
435
+ # --------------------------
436
+ def get_embeddings(
437
+ document_data: str,
438
+ document_type: DOCUMENT_TYPE = DOCUMENT_TYPE.PLAINTEXT,
439
+ ) -> Tuple[List[str], List[float]]:
440
+ documents = [LangChainDocument(page_content=document_data)]
441
+
442
+ logger.debug(documents)
443
+ if document_type == DOCUMENT_TYPE.MARKDOWN:
444
+ doc_splitter = MarkdownTextSplitter(
445
+ chunk_size=LLM_CHUNK_SIZE, chunk_overlap=LLM_CHUNK_OVERLAP
446
+ )
447
+ else:
448
+ doc_splitter = CharacterTextSplitter(
449
+ chunk_size=LLM_CHUNK_SIZE, chunk_overlap=LLM_CHUNK_OVERLAP
450
+ )
451
+
452
+ # Returns an array of Documents
453
+ split_documents = doc_splitter.split_documents(documents)
454
+
455
+ # Lets convert them into an array of strings for OpenAI
456
+ arr_documents = [doc.page_content for doc in split_documents]
457
+
458
+ # https://github.com/hwchase17/langchain/blob/d18b0caf0e00414e066c9903c8df72bb5bcf9998/langchain/embeddings/openai.py#L219
459
+ embed_func = OpenAIEmbeddings()
460
+
461
+ embeddings = embed_func.embed_documents(
462
+ texts=arr_documents, chunk_size=LLM_CHUNK_SIZE
463
+ )
464
+
465
+ return arr_documents, embeddings
app/api/main.py ADDED
@@ -0,0 +1,567 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import (
2
+ FastAPI,
3
+ File,
4
+ Depends,
5
+ HTTPException,
6
+ UploadFile
7
+ )
8
+ from fastapi.openapi.utils import get_openapi
9
+ from fastapi.staticfiles import StaticFiles
10
+ from sqlmodel import Session, select
11
+
12
+ from typing import (
13
+ List,
14
+ Optional,
15
+ Union,
16
+ Any
17
+ )
18
+ from datetime import datetime
19
+ import requests
20
+ import aiohttp
21
+ import time
22
+ import json
23
+ import os
24
+
25
+ # -----------
26
+ # LLM imports
27
+ # -----------
28
+ from llm import (
29
+ chat_query
30
+ )
31
+
32
+ # ----------------
33
+ # Database imports
34
+ # ----------------
35
+ from models import (
36
+ # ---------------
37
+ # Database Models
38
+ # ---------------
39
+ Organization,
40
+ OrganizationCreate,
41
+ OrganizationRead,
42
+ OrganizationUpdate,
43
+ User,
44
+ UserCreate,
45
+ UserRead,
46
+ UserReadList,
47
+ UserUpdate,
48
+ DocumentRead,
49
+ DocumentReadList,
50
+ ProjectCreate,
51
+ ProjectRead,
52
+ ProjectReadList,
53
+ ChatSessionResponse,
54
+ ChatSessionCreatePost,
55
+ WebhookCreate,
56
+ # ------------------
57
+ # Database functions
58
+ # ------------------
59
+ get_engine,
60
+ get_session
61
+
62
+ )
63
+ from helpers import (
64
+ # ----------------
65
+ # Helper functions
66
+ # ----------------
67
+ get_org_by_uuid_or_namespace,
68
+ get_project_by_uuid,
69
+ get_user_by_uuid_or_identifier,
70
+ get_users,
71
+ get_documents_by_project_and_org,
72
+ get_document_by_uuid,
73
+ create_org_by_org_or_uuid,
74
+ create_project_by_org
75
+ )
76
+ from util import (
77
+ save_file,
78
+ get_sha256,
79
+ is_uuid,
80
+ logger
81
+ )
82
+ # -----------
83
+ # LLM imports
84
+ # -----------
85
+ from config import (
86
+ APP_NAME,
87
+ APP_VERSION,
88
+ APP_DESCRIPTION,
89
+ ENTITY_STATUS,
90
+ CHANNEL_TYPE,
91
+ LLM_MODELS,
92
+ LLM_DISTANCE_THRESHOLD,
93
+ LLM_DEFAULT_DISTANCE_STRATEGY,
94
+ LLM_MAX_OUTPUT_TOKENS,
95
+ LLM_MIN_NODE_LIMIT,
96
+ FILE_UPLOAD_PATH,
97
+ RASA_WEBHOOK_URL
98
+ )
99
+
100
+
101
+ # ------------------
102
+ # Mount static files
103
+ # ------------------
104
+
105
+ # TODO: implement this: https://fastapi.tiangolo.com/advanced/extending-openapi/#change-the-theme
106
+ # See if you can style it to the Radix UI theme
107
+
108
+
109
+ app = FastAPI()
110
+
111
+ app.mount("/static", StaticFiles(directory="static"), name="static")
112
+
113
+ # Health check endpoint
114
+ # ---------------------
115
+ @app.get("/health", include_in_schema=False)
116
+ def health_check():
117
+ return {'status': 'ok'}
118
+
119
+
120
+ # ======================
121
+ # ORGANIZATION ENDPOINTS
122
+ # ======================
123
+
124
+ # ---------------------
125
+ # Get all organizations
126
+ # ---------------------
127
+ @app.get("/org", response_model=List[OrganizationRead])
128
+ def read_organizations():
129
+ '''
130
+ ## Get all active organizations
131
+
132
+ Returns:
133
+ List[OrganizationRead]: List of organizations
134
+
135
+ '''
136
+ with Session(get_engine()) as session:
137
+ orgs = session.exec(select(Organization).where(Organization.status == ENTITY_STATUS.ACTIVE.value)).all()
138
+ return orgs
139
+
140
+
141
+ # ----------------------
142
+ # Create an organization
143
+ # ----------------------
144
+ @app.post("/org", response_model=Union[OrganizationRead, Any])
145
+ def create_organization(
146
+ *,
147
+ session: Session = Depends(get_session),
148
+ organization: Optional[OrganizationCreate] = None,
149
+ namespace: Optional[str] = None,
150
+ display_name: Optional[str] = None
151
+ ):
152
+ '''
153
+
154
+ ### Creates a new organization
155
+ ### <u>Args:</u>
156
+ - **namespace**: Unique namespace for the organization (ex. openai)
157
+ - **name**: Name of the organization (ex. OpenAI)
158
+ - **bot_url**: URL of the bot (ex. https://t.me/your_bot)
159
+
160
+ ### <u>Returns:</u>
161
+ - OrganizationRead
162
+ ---
163
+ <details><summary>👇 💻 Code examples:</summary>
164
+ ### 🖥️ Curl
165
+ ```bash
166
+ curl -X POST "http://localhost:8888/org" -H "accept: application/json" -H "Content-Type: application/json" -d '{\"namespace\":\"openai\",\"name\":\"OpenAI\",\"bot_url\":\"https://t.me/your_bot\"}'
167
+ ```
168
+ <br/>
169
+ ### 🐍 Python
170
+ ```python
171
+ import requests
172
+ response = requests.post("http://localhost:8888/org", json={"namespace":"openai","name":"OpenAI","bot_url":"https://t.me/your_bot"})
173
+ print(response.json())
174
+ ```
175
+ </details>
176
+ '''
177
+ # Create organization
178
+ return create_org_by_org_or_uuid(
179
+ organization=organization,
180
+ namespace=namespace,
181
+ display_name=display_name, session=session
182
+ )
183
+
184
+
185
+ # ---------------------------
186
+ # Get an organization by UUID
187
+ # ---------------------------
188
+ @app.get("/org/{organization_id}", response_model=Union[OrganizationRead, Any])
189
+ def read_organization(
190
+ *,
191
+ session: Session = Depends(get_session),
192
+ organization_id: str
193
+ ):
194
+
195
+ organization = get_org_by_uuid_or_namespace(organization_id, session=session)
196
+
197
+ return organization
198
+
199
+
200
+ # ------------------------------
201
+ # Update an organization by UUID
202
+ # ------------------------------
203
+ @app.put("/org/{organization_id}", response_model=Union[OrganizationRead, Any])
204
+ def update_organization(
205
+ *,
206
+ session: Session = Depends(get_session),
207
+ organization_id: str,
208
+ organization: OrganizationUpdate
209
+ ):
210
+
211
+ org = get_org_by_uuid_or_namespace(organization_id, session=session)
212
+
213
+ org.update(organization.dict(exclude_unset=True))
214
+ return org
215
+
216
+
217
+ # =================
218
+ # Project endpoints
219
+ # =================
220
+
221
+ # -----------------------
222
+ # Get all projects by org
223
+ # -----------------------
224
+ @app.get("/project", response_model=List[ProjectReadList])
225
+ def read_projects(
226
+ *,
227
+ session: Session = Depends(get_session),
228
+ organization_id: str
229
+ ):
230
+
231
+ organization = get_org_by_uuid_or_namespace(organization_id, session=session)
232
+
233
+ if not organization.projects:
234
+ raise HTTPException(status_code=404, detail='No projects found for organization')
235
+
236
+ return organization.projects
237
+
238
+
239
+ # -----------------------
240
+ # Create a project by org
241
+ # -----------------------
242
+ @app.post("/project", response_model=Union[ProjectRead, Any])
243
+ def create_project(
244
+ *,
245
+ session: Session = Depends(get_session),
246
+ organization_id: str,
247
+ project: ProjectCreate
248
+ ):
249
+ return create_project_by_org(
250
+ organization_id=organization_id,
251
+ project=project,
252
+ session=session
253
+ )
254
+
255
+
256
+ # -----------------------------
257
+ # Get a project by UUID and org
258
+ # -----------------------------
259
+ @app.get("/project/{project_id}", response_model=Union[ProjectRead, Any])
260
+ def read_project(
261
+ *,
262
+ session: Session = Depends(get_session),
263
+ organization_id: str,
264
+ project_id: str
265
+ ):
266
+
267
+ return get_project_by_uuid(uuid=project_id, organization_id=organization_id, session=session)
268
+
269
+
270
+ # ==================
271
+ # DOCUMENT ENDPOINTS
272
+ # ==================
273
+
274
+ # ---------------
275
+ # Upload document
276
+ # ---------------
277
+ @app.post("/document", response_model=Union[DocumentReadList, Any])
278
+ async def upload_document(
279
+ *,
280
+ session: Session = Depends(get_session),
281
+ organization_id: str,
282
+ project_id: str,
283
+ url: Optional[str] = None,
284
+ file: Optional[UploadFile] = File(...),
285
+ overwrite: Optional[bool] = True
286
+ ):
287
+ organization = get_org_by_uuid_or_namespace(organization_id, session=session)
288
+ project = get_project_by_uuid(uuid=project_id, organization_id=organization_id, session=session)
289
+ file_root_path = os.path.join(FILE_UPLOAD_PATH, str(organization.uuid), str(project.uuid))
290
+
291
+ file_version = 1
292
+
293
+ # ------------------------
294
+ # Enforce XOR for url/file
295
+ # ------------------------
296
+ if url and file:
297
+ raise HTTPException(status_code=400, detail='You can only upload a file OR provide a URL, not both')
298
+
299
+ # --------------------
300
+ # Upload file from URL
301
+ # --------------------
302
+ if url:
303
+ file_name = url.split('/')[-1]
304
+ file_upload_path = os.path.join(file_root_path, file_name)
305
+ file_exists = os.path.isfile(file_upload_path)
306
+
307
+ if file_exists:
308
+ file_name = f'{file_name}_{int(time.time())}'
309
+ file_upload_path = os.path.join(file_root_path, file_name)
310
+
311
+ async with aiohttp.ClientSession() as session:
312
+ async with session.get(url) as resp:
313
+ if resp.status != 200:
314
+ raise HTTPException(status_code=400, detail=f'Could not download file from {url}')
315
+
316
+ with open(file_upload_path, 'wb') as f:
317
+ while True:
318
+ chunk = await resp.content.read(1024)
319
+ if not chunk:
320
+ break
321
+ f.write(chunk)
322
+
323
+ file_contents = open(file_upload_path, 'rb').read()
324
+ file_hash = get_sha256(contents=file_contents)
325
+
326
+ # -----------------------
327
+ # Upload file from device
328
+ # -----------------------
329
+ else:
330
+ file_name = file.filename
331
+ file_upload_path = os.path.join(file_root_path, file_name)
332
+ file_exists = os.path.isfile(file_upload_path)
333
+
334
+ if file_exists:
335
+ file_name = f'{file_name}_{int(time.time())}'
336
+ file_upload_path = os.path.join(file_root_path, file_name)
337
+
338
+ file_contents = await file.read()
339
+ file_hash = get_sha256(contents=file_contents)
340
+ await save_file(file, file_upload_path)
341
+
342
+ document_obj = create_document_by_file_path(
343
+ organization=organization,
344
+ project=project,
345
+ file_path=file_upload_path,
346
+ file_hash=file_hash,
347
+ file_version=file_version,
348
+ url=url,
349
+ overwrite=overwrite,
350
+ session=session
351
+ )
352
+ return document_obj
353
+
354
+
355
+ # --------------------------------
356
+ # List all documents for a project
357
+ # --------------------------------
358
+ @app.get("/document", response_model=List[DocumentReadList])
359
+ def read_documents(
360
+ *,
361
+ session: Session = Depends(get_session),
362
+ organization_id: str,
363
+ project_id: str
364
+ ):
365
+ return get_documents_by_project_and_org(project_id=project_id, organization_id=organization_id, session=session)
366
+
367
+ # ----------------------
368
+ # Get a document by UUID
369
+ # ----------------------
370
+ @app.get("/document/{document_id}", response_model=DocumentRead)
371
+ def read_document(
372
+ *,
373
+ session: Session = Depends(get_session),
374
+ organization_id: str,
375
+ project_id: str,
376
+ document_id: str
377
+ ):
378
+ return get_document_by_uuid(uuid=document_id, project_id=project_id, organization_id=organization_id, session=session)
379
+
380
+
381
+ # ==============
382
+ # USER ENDPOINTS
383
+ # ==============
384
+
385
+ # -------------
386
+ # Get all users
387
+ # -------------
388
+ @app.get("/user", response_model=List[UserReadList])
389
+ def read_users(
390
+ *,
391
+ session: Session = Depends(get_session),
392
+ ):
393
+ return get_users(session=session)
394
+
395
+
396
+ # -------------
397
+ # Create a user
398
+ # -------------
399
+ @app.post("/user", response_model=UserRead)
400
+ def create_user(
401
+ *,
402
+ session: Session = Depends(get_session),
403
+ user: UserCreate
404
+ ):
405
+
406
+ return create_user(
407
+ user=user,
408
+ session=session
409
+ )
410
+
411
+
412
+ # ------------------
413
+ # Get a user by UUID
414
+ # ------------------
415
+ @app.get("/user/{user_uuid}", response_model=UserRead)
416
+ def read_user(
417
+ *,
418
+ session: Session = Depends(get_session),
419
+ user_id: str
420
+ ):
421
+
422
+ return get_user_by_uuid_or_identifier(id=user_id, session=session)
423
+
424
+
425
+ # ---------------------
426
+ # Update a user by UUID
427
+ # ---------------------
428
+ @app.put("/user/{user_uuid}", response_model=UserRead)
429
+ def update_user(*, user_uuid: str, user: UserUpdate):
430
+
431
+ # Get user by UUID
432
+ user = User.get(uuid=user_uuid)
433
+
434
+ # If user exists, update it
435
+ if user:
436
+ user.update(**user.dict())
437
+ return user
438
+
439
+ # If user doesn't exist, return 404
440
+ else:
441
+ raise HTTPException(status_code=404, detail=f'User {user_uuid} not found!')
442
+
443
+
444
+ # =============
445
+ # LLM ENDPOINTS
446
+ # =============
447
+
448
+
449
+ def process_webhook_telegram(webhook_data: dict):
450
+ """
451
+ Telegram example response:
452
+ {
453
+ "update_id": 248146407,
454
+ "message": {
455
+ "message_id": 299,
456
+ "from": {
457
+ "id": 123456789,
458
+ "is_bot": false,
459
+ "first_name": "Elon",
460
+ "username": "elonmusk",
461
+ "language_code": "en"
462
+ },
463
+ "chat": {
464
+ "id": 123456789,
465
+ "first_name": "Elon",
466
+ "username": "elonmusk",
467
+ "type": "private"
468
+ },
469
+ "date": 1683115867,
470
+ "text": "Tell me about the company?"
471
+ }
472
+ }
473
+ """
474
+ message = webhook_data.get('message', None)
475
+ chat = message.get('chat', None)
476
+ message_from = message.get('from', None)
477
+ return {
478
+ 'update_id': webhook_data.get('update_id', None),
479
+ 'message_id': message.get('message_id', None),
480
+ 'user_id': message_from.get('id', None),
481
+ 'username': message_from.get('username', None),
482
+ 'user_language': message_from.get('language_code', None),
483
+ 'user_firstname': chat.get('first_name', None),
484
+ 'user_message': message.get('text', None),
485
+ 'message_ts': datetime.fromtimestamp(message.get('date', None)) if message.get('date', None) else None,
486
+ 'message_type': chat.get('type', None)
487
+ }
488
+
489
+
490
+ @app.post("/webhooks/{channel}/webhook")
491
+ def get_webhook(
492
+ *,
493
+ session: Session = Depends(get_session),
494
+ channel: str,
495
+ webhook: WebhookCreate
496
+ ):
497
+ webhook_data = webhook.dict()
498
+
499
+ # --------------------
500
+ # Get webhook metadata
501
+ # --------------------
502
+ if channel == 'telegram':
503
+ rasa_webhook_url = f'{RASA_WEBHOOK_URL}/webhooks/{channel}/webhook'
504
+ data = process_webhook_telegram(webhook_data)
505
+ channel = CHANNEL_TYPE.TELEGRAM.value
506
+ user_data = {
507
+ 'identifier': data['user_id'],
508
+ 'identifier_type': channel,
509
+ 'first_name': data['user_firstname'],
510
+ 'language': data['user_language']
511
+ }
512
+ session_metadata = {
513
+ 'update_id': data['update_id'],
514
+ 'username': data['username'],
515
+ 'message_id': data['user_message'],
516
+ 'msg_ts': data['message_ts'],
517
+ 'msg_type': data['message_type'],
518
+ }
519
+ user_message = data['user_message']
520
+ else:
521
+ # Not a valid channel, return 404
522
+ raise HTTPException(status_code=404, detail=f'Channel {channel} not a valid webhook channel!')
523
+
524
+ chat_session = chat_query(
525
+ user_message,
526
+ session=session,
527
+ channel=channel,
528
+ identifier=user_data['identifier'],
529
+ user_data=user_data,
530
+ meta=session_metadata
531
+ )
532
+
533
+ meta = chat_session.meta
534
+
535
+ # -----------------------------------------
536
+ # Lets add the LLM response to the metadata
537
+ # -----------------------------------------
538
+ webhook_data['message']['meta'] = {
539
+ 'response': chat_session.response if chat_session.response else None,
540
+ 'tags': meta['tags'] if 'tags' in meta else None,
541
+ 'is_escalate': meta['is_escalate'] if 'is_escalate' in meta else False,
542
+ 'session_id': meta['session_id'] if 'session_id' in meta else None
543
+
544
+ }
545
+
546
+ # -----------------------------------
547
+ # Forward the webhook to Rasa webhook
548
+ # -----------------------------------
549
+ res = requests.post(rasa_webhook_url, data=json.dumps(webhook_data))
550
+ logger.debug(f'[🤖 RasaGPT API webhook]\nPosting data: {json.dumps(webhook_data)}\n\n[🤖 RasaGPT API webhook]\nRasa webhook response: {res.text}')
551
+
552
+ return {'status': 'ok'}
553
+
554
+
555
+ # ------------------
556
+ # Customize API docs
557
+ # ------------------
558
+ _schema = get_openapi(
559
+ title=APP_NAME,
560
+ description=APP_DESCRIPTION,
561
+ version=APP_VERSION,
562
+ routes=app.routes,
563
+ )
564
+ _schema['info']['x-logo'] = {
565
+ 'url': '/static/img/rasagpt-logo-1.png'
566
+ }
567
+ app.openapi_schema = _schema
app/api/models.py ADDED
@@ -0,0 +1,660 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy.dialects.postgresql import JSONB
2
+ from sqlalchemy.orm import declared_attr
3
+ from pgvector.sqlalchemy import Vector
4
+ from sqlalchemy import Column
5
+ from datetime import datetime
6
+ from util import snake_case
7
+ import uuid as uuid_pkg
8
+
9
+ from sqlmodel import (
10
+ UniqueConstraint,
11
+ create_engine,
12
+ Relationship,
13
+ SQLModel,
14
+ Session,
15
+ select,
16
+ Field,
17
+ )
18
+ from typing import (
19
+ Optional,
20
+ Union,
21
+ List,
22
+ Dict,
23
+ Any
24
+ )
25
+ from config import (
26
+ LLM_DEFAULT_DISTANCE_STRATEGY,
27
+ VECTOR_EMBEDDINGS_COUNT,
28
+ LLM_MAX_OUTPUT_TOKENS,
29
+ DISTANCE_STRATEGIES,
30
+ LLM_MIN_NODE_LIMIT,
31
+ PGVECTOR_ADD_INDEX,
32
+ ENTITY_STATUS,
33
+ CHANNEL_TYPE,
34
+ LLM_MODELS,
35
+ DB_USER,
36
+ SU_DSN,
37
+ logger,
38
+ )
39
+
40
+
41
+ # ==========
42
+ # Base model
43
+ # ==========
44
+ class BaseModel(SQLModel):
45
+ @declared_attr
46
+ def __tablename__(cls) -> str:
47
+ return snake_case(cls.__name__)
48
+
49
+ @classmethod
50
+ def by_uuid(self, _uuid: uuid_pkg.UUID):
51
+ with Session(get_engine()) as session:
52
+ q = select(self).where(self.uuid == _uuid)
53
+ org = session.exec(q).first()
54
+ return org if org else None
55
+
56
+ def update(self, o: Union[SQLModel, dict] = None):
57
+ if not o:
58
+ raise ValueError("Must provide a model or dict to update values")
59
+ o = o if isinstance(o, dict) else o.dict(exclude_unset=True)
60
+ for key, value in o.items():
61
+ setattr(self, key, value)
62
+
63
+ # save and commit to database
64
+ with Session(get_engine()) as session:
65
+ session.add(self)
66
+ session.commit()
67
+ session.refresh(self)
68
+
69
+ def delete(self):
70
+ with Session(get_engine()) as session:
71
+ self.status = ENTITY_STATUS.DELETED
72
+ self.updated_at = datetime.utcnow()
73
+ session.add(self)
74
+ session.commit()
75
+ session.refresh(self)
76
+
77
+ @classmethod
78
+ def create(self, o: Union[SQLModel, dict] = None):
79
+ if not o:
80
+ raise ValueError("Must provide a model or dict to update values")
81
+
82
+ with Session(get_engine()) as session:
83
+ obj = self.from_orm(o) if isinstance(o, SQLModel) else self(**o)
84
+ session.add(obj)
85
+ session.commit()
86
+ session.refresh(obj)
87
+
88
+ return obj
89
+
90
+
91
+ # ============
92
+ # Organization
93
+ # ============
94
+ class Organization(BaseModel, table=True):
95
+ id: Optional[int] = Field(default=None, primary_key=True)
96
+ uuid: Optional[uuid_pkg.UUID] = Field(
97
+ unique=True, default_factory=uuid_pkg.uuid4
98
+ ) # UUID for the organization
99
+ display_name: Optional[str] = Field(
100
+ default="Untitled Organization 😊", index=True
101
+ ) # display name of the organization
102
+ namespace: str = Field(
103
+ unique=True, index=True
104
+ ) # unique organization namespace for URLs, etc.
105
+ bot_url: Optional[str] = Field(default=None) # URL for the bot
106
+ status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
107
+ created_at: Optional[datetime] = Field(default_factory=datetime.now)
108
+ updated_at: Optional[datetime] = Field(default_factory=datetime.now)
109
+
110
+ # -------------
111
+ # Relationships
112
+ # -------------
113
+ projects: Optional[List["Project"]] = Relationship(back_populates="organization")
114
+ documents: Optional[List["Document"]] = Relationship(back_populates="organization")
115
+
116
+ @property
117
+ def project_count(self) -> int:
118
+ return len(self.projects)
119
+
120
+ @property
121
+ def document_count(self) -> int:
122
+ return len(self.documents)
123
+
124
+ def __repr__(self):
125
+ return f"<Organization id={self.id} name={self.display_name} namespace={self.namespace} uuid={self.uuid}>"
126
+
127
+
128
+ class OrganizationCreate(SQLModel):
129
+ display_name: Optional[str]
130
+ namespace: Optional[str]
131
+ bot_url: Optional[str]
132
+
133
+
134
+ class OrganizationRead(SQLModel):
135
+ id: int
136
+ uuid: uuid_pkg.UUID
137
+ display_name: str
138
+ namespace: Optional[str]
139
+ bot_url: Optional[str]
140
+ created_at: datetime
141
+ updated_at: datetime
142
+
143
+
144
+ class OrganizationUpdate(SQLModel):
145
+ display_name: Optional[str]
146
+ namespace: Optional[str]
147
+ bot_url: Optional[str]
148
+
149
+
150
+ # ===============
151
+ # User (customer)
152
+ # ===============
153
+ class User(BaseModel, table=True):
154
+ id: Optional[int] = Field(default=None, primary_key=True)
155
+ identifier: str = Field(default=None, unique=True, index=True)
156
+ identifier_type: Optional[CHANNEL_TYPE] = Field(default=None)
157
+ uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4)
158
+ first_name: Optional[str] = Field(default=None)
159
+ last_name: Optional[str] = Field(default=None)
160
+ email: Optional[str] = Field(default=None)
161
+ phone: Optional[str] = Field(default=None)
162
+ dob: Optional[datetime] = Field(default=None)
163
+ device_fingerprint: Optional[str] = Field(default=None)
164
+ created_at: Optional[datetime] = Field(default_factory=datetime.now)
165
+ updated_at: Optional[datetime] = Field(default_factory=datetime.now)
166
+
167
+ # -------------
168
+ # Relationships
169
+ # -------------
170
+ chat_sessions: Optional[List["ChatSession"]] = Relationship(back_populates="user")
171
+
172
+ @property
173
+ def chat_session_count(self) -> int:
174
+ return len(self.chat_sessions)
175
+
176
+ __table_args__ = (
177
+ UniqueConstraint("identifier", "identifier_type", name="unq_id_idtype"),
178
+ )
179
+
180
+ def __repr__(self):
181
+ return f"<User id={self.id} uuid={self.uuid} project_id={self.project_id} device_fingerprint={self.device_fingerprint}>"
182
+
183
+
184
+ class UserCreate(SQLModel):
185
+ identifier: str
186
+ identifier_type: CHANNEL_TYPE
187
+ device_fingerprint: Optional[str]
188
+ first_name: Optional[str]
189
+ last_name: Optional[str]
190
+ email: Optional[str]
191
+ phone: Optional[str]
192
+ dob: Optional[datetime]
193
+
194
+
195
+ class UserReadList(SQLModel):
196
+ id: int
197
+ identifier: Optional[str]
198
+ identifier_type: Optional[CHANNEL_TYPE]
199
+ uuid: uuid_pkg.UUID
200
+ device_fingerprint: Optional[str]
201
+ first_name: Optional[str]
202
+ last_name: Optional[str]
203
+ email: Optional[str]
204
+ phone: Optional[str]
205
+ dob: Optional[datetime]
206
+ chat_session_count: int
207
+ created_at: datetime
208
+ updated_at: datetime
209
+
210
+
211
+ class UserUpdate(SQLModel):
212
+ device_fingerprint: Optional[str]
213
+ device_fingerprint: Optional[str]
214
+ first_name: Optional[str]
215
+ last_name: Optional[str]
216
+ email: Optional[str]
217
+ phone: Optional[str]
218
+ dob: Optional[datetime]
219
+
220
+
221
+ # =======
222
+ # Project
223
+ # =======
224
+ class Project(BaseModel, table=True):
225
+ id: Optional[int] = Field(default=None, primary_key=True)
226
+ uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4)
227
+ organization_id: int = Field(default=None, foreign_key="organization.id")
228
+ display_name: str = Field(default="📝 Untitled Project")
229
+ status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
230
+ created_at: Optional[datetime] = Field(default_factory=datetime.now)
231
+ updated_at: Optional[datetime] = Field(default_factory=datetime.now)
232
+
233
+ # -------------
234
+ # Relationships
235
+ # -------------
236
+ organization: Optional["Organization"] = Relationship(back_populates="projects")
237
+ documents: Optional[List["Document"]] = Relationship(back_populates="project")
238
+ chat_sessions: Optional[List["ChatSession"]] = Relationship(
239
+ back_populates="project"
240
+ )
241
+
242
+ @property
243
+ def document_count(self) -> int:
244
+ return len(self.documents)
245
+
246
+ def __repr__(self):
247
+ return f"<Project id={self.id} name={self.display_name} uuid={self.uuid} project_id={self.uuid}>"
248
+
249
+
250
+ class ProjectCreate(SQLModel):
251
+ display_name: Optional[str]
252
+
253
+
254
+ class ProjectReadListOrganization(SQLModel):
255
+ uuid: uuid_pkg.UUID
256
+ display_name: str
257
+ namespace: Optional[str]
258
+ document_count: int
259
+
260
+
261
+ class ProjectUpdate(SQLModel):
262
+ display_name: Optional[str]
263
+ status: Optional[ENTITY_STATUS]
264
+
265
+
266
+ # =========
267
+ # Documents
268
+ # =========
269
+ class Document(BaseModel, table=True):
270
+ id: Optional[int] = Field(default=None, primary_key=True)
271
+ uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4)
272
+ organization_id: int = Field(default=None, foreign_key="organization.id")
273
+ project_id: int = Field(default=None, foreign_key="project.id")
274
+ display_name: str = Field(default="Untitled Document 😊")
275
+ url: str = Field(default="")
276
+ data: Optional[bytes] = Field(default=None)
277
+ hash: str = Field(default=None)
278
+ version: Optional[int] = Field(default=1)
279
+ status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
280
+ created_at: Optional[datetime] = Field(default_factory=datetime.now)
281
+ updated_at: Optional[datetime] = Field(default_factory=datetime.now)
282
+
283
+ # -------------
284
+ # Relationships
285
+ # -------------
286
+ nodes: Optional[List["Node"]] = Relationship(back_populates="document")
287
+ organization: Optional["Organization"] = Relationship(back_populates="documents")
288
+ project: Optional["Project"] = Relationship(back_populates="documents")
289
+
290
+ @property
291
+ def node_count(self) -> int:
292
+ return len(self.nodes)
293
+
294
+ __table_args__ = (UniqueConstraint("uuid", "hash", name="unq_org_document"),)
295
+
296
+ def __repr__(self):
297
+ return f"<Document id={self.id} name={self.display_name} uuid={self.uuid}>"
298
+
299
+
300
+ class ProjectRead(SQLModel):
301
+ id: int
302
+ uuid: uuid_pkg.UUID
303
+ organization: Organization
304
+ document_count: int
305
+ documents: Optional[List[Document]] = None
306
+ display_name: str
307
+ created_at: datetime
308
+ updated_at: datetime
309
+
310
+
311
+ class DocumentCreate(SQLModel):
312
+ project: Project
313
+ display_name: Optional[str]
314
+ url: Optional[str]
315
+ version: Optional[str]
316
+ data: Optional[bytes]
317
+ hash: Optional[str]
318
+
319
+
320
+ class DocumentUpdate(SQLModel):
321
+ status: Optional[ENTITY_STATUS]
322
+
323
+
324
+ # ==============
325
+ # Document Nodes
326
+ # ==============
327
+ class Node(BaseModel, table=True):
328
+ class Config:
329
+ arbitrary_types_allowed = True
330
+
331
+ id: Optional[int] = Field(default=None, primary_key=True)
332
+ document_id: int = Field(default=None, foreign_key="document.id")
333
+ uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4)
334
+ embeddings: Optional[List[float]] = Field(
335
+ sa_column=Column(Vector(VECTOR_EMBEDDINGS_COUNT))
336
+ )
337
+ meta: Optional[Dict] = Field(default=None, sa_column=Column(JSONB))
338
+ token_count: Optional[int] = Field(default=None)
339
+ text: str = Field(default=None, nullable=False)
340
+ status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
341
+ created_at: Optional[datetime] = Field(default_factory=datetime.now)
342
+ updated_at: Optional[datetime] = Field(default_factory=datetime.now)
343
+
344
+ # -------------
345
+ # Relationships
346
+ # -------------
347
+ document: Optional["Document"] = Relationship(back_populates="nodes")
348
+
349
+ def __repr__(self):
350
+ return f"<Node id={self.id} uuid={self.uuid} document={self.document_id}>"
351
+
352
+
353
+ class NodeCreate(SQLModel):
354
+ document: Document
355
+ embeddings: List[float]
356
+ token_count: Optional[int]
357
+ text: str
358
+ status: Optional[ENTITY_STATUS]
359
+
360
+
361
+ class NodeRead(SQLModel):
362
+ id: int
363
+ document: Document
364
+ embeddings: Optional[List[float]]
365
+ token_count: Optional[int]
366
+ text: str
367
+ created_at: datetime
368
+
369
+
370
+ class DocumentReadNodeList(SQLModel):
371
+ id: int
372
+ uuid: uuid_pkg.UUID
373
+ display_name: str
374
+ node_count: int
375
+
376
+
377
+ class NodeReadResult(SQLModel):
378
+ id: int
379
+ token_count: Optional[int]
380
+ text: str
381
+ meta: Optional[Dict]
382
+
383
+
384
+ class ProjectReadListDocumentList(SQLModel):
385
+ uuid: uuid_pkg.UUID
386
+ display_name: str
387
+ node_count: Optional[int]
388
+
389
+
390
+ class ProjectReadList(SQLModel):
391
+ id: int
392
+ # organization: ProjectReadListOrganization
393
+ documents: Optional[List[DocumentReadNodeList]]
394
+ document_count: int
395
+ uuid: uuid_pkg.UUID
396
+ display_name: str
397
+ created_at: datetime
398
+ updated_at: datetime
399
+
400
+
401
+ class NodeReadList(SQLModel):
402
+ id: int
403
+ document: DocumentReadNodeList
404
+ embeddings: Optional[List[float]]
405
+ token_count: Optional[int]
406
+ text: str
407
+ created_at: datetime
408
+
409
+
410
+ class NodeUpdate(SQLModel):
411
+ status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
412
+
413
+
414
+ class NodeReadListDocumentRead(SQLModel):
415
+ uuid: uuid_pkg.UUID
416
+ token_count: Optional[int]
417
+ created_at: datetime
418
+
419
+
420
+ class DocumentReadList(SQLModel):
421
+ id: int
422
+ uuid: uuid_pkg.UUID
423
+ display_name: str
424
+ version: int
425
+ nodes: Optional[List[NodeReadListDocumentRead]] = None
426
+ node_count: int
427
+ hash: str
428
+ created_at: datetime
429
+ updated_at: datetime
430
+
431
+
432
+ # ============
433
+ # Chat Session
434
+ # ============
435
+ class ChatSession(BaseModel, table=True):
436
+ class Config:
437
+ arbitrary_types_allowed = True
438
+
439
+ id: Optional[int] = Field(default=None, primary_key=True)
440
+ session_id: Optional[uuid_pkg.UUID] = Field(
441
+ index=True, default_factory=uuid_pkg.uuid4
442
+ )
443
+ user_id: int = Field(default=None, foreign_key="user.id")
444
+ project_id: int = Field(default=None, foreign_key="project.id")
445
+ channel: CHANNEL_TYPE = Field(default=CHANNEL_TYPE.TELEGRAM)
446
+ user_message: str = Field(default=None)
447
+ token_count: Optional[int] = Field(default=None)
448
+ embeddings: Optional[List[float]] = Field(
449
+ sa_column=Column(Vector(VECTOR_EMBEDDINGS_COUNT))
450
+ )
451
+ response: Optional[str] = Field(default=None)
452
+ meta: Optional[Dict] = Field(default=None, sa_column=Column(JSONB))
453
+ created_at: datetime = Field(default_factory=datetime.now)
454
+
455
+ # -------------
456
+ # Relationships
457
+ # -------------
458
+ user: Optional["User"] = Relationship(back_populates="chat_sessions")
459
+ project: Optional["Project"] = Relationship(back_populates="chat_sessions")
460
+
461
+ def __repr__(self):
462
+ return f"<ChatSession id={self.id} uuid={self.uuid} project_id={self.project_id} user_id={self.user_id} message={self.user_message}>"
463
+
464
+
465
+ class ChatSessionCreatePost(SQLModel):
466
+ project_id: Optional[str] = ""
467
+ organization_id: Optional[str] = "pepe"
468
+ channel: Optional[CHANNEL_TYPE] = CHANNEL_TYPE.TELEGRAM
469
+ query: Optional[str] = "What is the weather like in London right now?"
470
+ identifier: Optional[str] = "@username"
471
+ distance_strategy: Optional[str] = LLM_DEFAULT_DISTANCE_STRATEGY
472
+ max_output_tokens: Optional[int] = LLM_MAX_OUTPUT_TOKENS
473
+ node_limit: Optional[int] = LLM_MIN_NODE_LIMIT
474
+ model: Optional[str] = LLM_MODELS.GPT_35_TURBO
475
+ session_id: Optional[str] = ""
476
+
477
+
478
+ class ChatSessionCreate(SQLModel):
479
+ channel: CHANNEL_TYPE
480
+ token_count: Optional[int]
481
+ user_message: str
482
+ embeddings: List[float]
483
+ response: Optional[str]
484
+
485
+
486
+ class ChatSessionRead(SQLModel):
487
+ id: int
488
+ user: User
489
+ project: Optional[ProjectReadListDocumentList]
490
+ token_count: Optional[int]
491
+ channel: CHANNEL_TYPE
492
+ user_message: str
493
+ embeddings: List[float]
494
+ response: Optional[str]
495
+ meta: Optional[dict]
496
+ created_at: datetime = Field(default_factory=datetime.now)
497
+
498
+
499
+ class ChatSessionResponse(SQLModel):
500
+ meta: Optional[dict]
501
+ response: Optional[str]
502
+ user_message: Optional[str]
503
+
504
+
505
+ class ProjectReadChatSessionRead(SQLModel):
506
+ id: int
507
+ token_count: Optional[int]
508
+ channel: CHANNEL_TYPE
509
+ created_at: datetime = Field(default_factory=datetime.now)
510
+
511
+
512
+ class ChatSessionReadUserRead(SQLModel):
513
+ id: int
514
+ project: Optional[ProjectReadListDocumentList]
515
+ token_count: Optional[int]
516
+ channel: CHANNEL_TYPE
517
+ user_message: str
518
+ response: Optional[str]
519
+ created_at: datetime = Field(default_factory=datetime.now)
520
+
521
+
522
+ class UserRead(SQLModel):
523
+ id: int
524
+ identifier: Optional[str]
525
+ identifier_type: Optional[CHANNEL_TYPE]
526
+ uuid: uuid_pkg.UUID
527
+ language: Optional[str]
528
+ device_fingerprint: Optional[str]
529
+ first_name: Optional[str]
530
+ last_name: Optional[str]
531
+ email: Optional[str]
532
+ phone: Optional[str]
533
+ dob: Optional[datetime]
534
+ chat_session_count: int
535
+ chat_sessions: Optional[List[ChatSessionReadUserRead]]
536
+ created_at: datetime
537
+ updated_at: datetime
538
+
539
+
540
+ class DocumentReadProjectRead(SQLModel):
541
+ uuid: uuid_pkg.UUID
542
+ display_name: str
543
+ namespace: Optional[str]
544
+ document_count: int
545
+
546
+
547
+ class DocumentRead(SQLModel):
548
+ id: int
549
+ uuid: uuid_pkg.UUID
550
+ project: DocumentReadProjectRead
551
+ organization: OrganizationRead
552
+ display_name: str
553
+ node_count: int
554
+ url: Optional[str]
555
+ version: int
556
+ data: bytes
557
+ hash: str
558
+ created_at: datetime
559
+ updated_at: datetime
560
+
561
+
562
+ class WebhookCreate(SQLModel):
563
+ update_id: str
564
+ message: Dict[str, Any]
565
+
566
+
567
+ class WebhookResponse(SQLModel):
568
+ update_id: str
569
+ message: Dict[str, Any]
570
+
571
+
572
+ # ==================
573
+ # Database functions
574
+ # ==================
575
+ def get_engine(dsn: str = SU_DSN):
576
+ return create_engine(dsn)
577
+
578
+
579
+ def get_session():
580
+ with Session(get_engine()) as session:
581
+ yield session
582
+
583
+
584
+ def create_db():
585
+ logger.info("...Enabling pgvector and creating database tables")
586
+ enable_vector()
587
+ BaseModel.metadata.create_all(get_engine(dsn=SU_DSN))
588
+ create_user_permissions()
589
+ create_vector_index()
590
+
591
+
592
+ def create_user_permissions():
593
+ session = Session(get_engine(dsn=SU_DSN))
594
+ # grant access to entire database and all tables to user DB_USER
595
+ query = f"GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO {DB_USER};"
596
+ session.execute(query)
597
+ session.commit()
598
+ session.close()
599
+
600
+
601
+ def drop_db():
602
+ BaseModel.metadata.drop_all(get_engine(dsn=SU_DSN))
603
+
604
+
605
+ def create_vector_index():
606
+ # -------------------------------------
607
+ # Let's add an index for the embeddings
608
+ # -------------------------------------
609
+ if PGVECTOR_ADD_INDEX is True:
610
+ session = Session(get_engine(dsn=SU_DSN))
611
+ for strategy in DISTANCE_STRATEGIES:
612
+ session.execute(strategy[3])
613
+ session.commit()
614
+
615
+
616
+ def enable_vector():
617
+ session = Session(get_engine(dsn=SU_DSN))
618
+ query = "CREATE EXTENSION IF NOT EXISTS vector;"
619
+ session.execute(query)
620
+ session.commit()
621
+ add_vector_distance_fn(session)
622
+ session.close()
623
+
624
+
625
+ def add_vector_distance_fn(session: Session):
626
+ for strategy in DISTANCE_STRATEGIES:
627
+ strategy_name = strategy[1]
628
+ strategy_distance_str = strategy[2]
629
+
630
+ query = f"""create or replace function match_node_{strategy_name} (
631
+ query_embeddings vector({VECTOR_EMBEDDINGS_COUNT}),
632
+ match_threshold float,
633
+ match_count int
634
+ ) returns table (
635
+ uuid uuid,
636
+ text varchar,
637
+ similarity float
638
+ )
639
+ language plpgsql
640
+ as $$
641
+ begin
642
+ return query
643
+ select
644
+ node.uuid,
645
+ node.text,
646
+ 1 - (node.embeddings {strategy_distance_str} query_embeddings) as similarity
647
+ from node
648
+ where 1 - (node.embeddings {strategy_distance_str} query_embeddings) > match_threshold
649
+ order by similarity desc
650
+ limit match_count;
651
+ end;
652
+ $$;"""
653
+
654
+ session.execute(query)
655
+ session.commit()
656
+ session.close()
657
+
658
+
659
+ if __name__ == "__main__":
660
+ create_db()
app/api/ngrok.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import yaml
4
+ import requests
5
+ import logging
6
+ import httpx
7
+ import asyncio
8
+ from time import sleep
9
+ from fastapi import FastAPI, Depends, HTTPException
10
+
11
+ # ---------
12
+ CREDENTIALS_READY = False
13
+ RETRY_LIMIT = 10
14
+ RETRY_INTERVAL = 15
15
+
16
+ # ----------------
17
+ # Environment vars
18
+ # ----------------
19
+ NGROK_HOST = os.getenv('NGROK_HOST', 'ngrok')
20
+ NGROK_PORT = os.getenv('NGROK_PORT', 4040)
21
+ NGROK_INTERNAL_WEBHOOK_HOST = os.getenv('NGROK_INTERNAL_WEBHOOK_HOST', 'rasa-core')
22
+ NGROK_INTERNAL_WEBHOOK_PORT = os.getenv('NGROK_INTERNAL_WEBHOOK_PORT', 5005)
23
+ NGROK_API_URL = f'http://{NGROK_HOST}:{NGROK_PORT}'
24
+ TELEGRAM_ACCESS_TOKEN = os.getenv('TELEGRAM_ACCESS_TOKEN', None)
25
+ TELEGRAM_BOTNAME = os.getenv('TELEGRAM_BOTNAME', None)
26
+ CREDENTIALS_PATH = os.getenv('CREDENTIALS_PATH', '/app/rasa/credentials.yml')
27
+
28
+ # -------
29
+ # Logging
30
+ # -------
31
+ logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
32
+ logger = logging.getLogger(__name__)
33
+ logger.debug(f'NGROK_HOST: {NGROK_HOST}:{NGROK_PORT}\nNGROK_API_URL: {NGROK_API_URL}\nNGROK_INTERNAL_WEBHOOK_HOST: {NGROK_INTERNAL_WEBHOOK_HOST}:{NGROK_INTERNAL_WEBHOOK_PORT}')
34
+
35
+
36
+ async def wait_for_ngrok_api():
37
+ while True:
38
+ try:
39
+ async with httpx.AsyncClient() as client:
40
+ response = await client.get(f"{NGROK_API_URL}/api/tunnels")
41
+ response.raise_for_status()
42
+ logger.debug('ngrok API is online.')
43
+ return True
44
+ except httpx.RequestError:
45
+ logger.debug('ngrok API is offline. Waiting...')
46
+ await asyncio.sleep(RETRY_INTERVAL)
47
+
48
+
49
+ async def get_active_tunnels():
50
+ response = requests.get(f'{NGROK_API_URL}/api/tunnels')
51
+ response.raise_for_status()
52
+ tunnels = response.json()['tunnels']
53
+ return tunnels
54
+
55
+
56
+ async def stop_tunnel(tunnel):
57
+ tunnel_id = tunnel['name']
58
+ response = requests.delete(f'{NGROK_API_URL}/api/tunnels/{tunnel_id}')
59
+ response.raise_for_status()
60
+
61
+
62
+ async def stop_all_tunnels():
63
+ active_tunnels = await get_active_tunnels()
64
+ if not active_tunnels:
65
+ logger.debug('No active tunnels found.')
66
+ else:
67
+ for tunnel in active_tunnels:
68
+ logger.debug(f"Stopping tunnel: {tunnel['name']} ({tunnel['public_url']})")
69
+ await stop_tunnel(tunnel)
70
+
71
+
72
+ async def create_tunnel():
73
+ response = requests.post(f'{NGROK_API_URL}/api/tunnels', json={
74
+ 'addr': f'{NGROK_INTERNAL_WEBHOOK_HOST}:{NGROK_INTERNAL_WEBHOOK_PORT}',
75
+ 'proto': 'http',
76
+ 'name': NGROK_INTERNAL_WEBHOOK_HOST,
77
+
78
+ })
79
+ response.raise_for_status()
80
+ return response.json()['public_url']
81
+
82
+
83
+ # ----------------------
84
+ # Fetch ngrok public URL
85
+ # ----------------------
86
+ async def get_ngrok_url():
87
+ return await create_tunnel()
88
+
89
+
90
+ # ----------------------------
91
+ # Update Rasa credentials file
92
+ # ----------------------------
93
+ async def update_credentials_file(ngrok_url):
94
+ global CREDENTIALS_READY
95
+ try:
96
+ with open(CREDENTIALS_PATH, 'r') as file:
97
+ credentials = yaml.safe_load(file)
98
+
99
+ credentials['telegram']['webhook_url'] = f"{ngrok_url}/webhooks/telegram/webhook"
100
+ credentials['telegram']['access_token'] = TELEGRAM_ACCESS_TOKEN
101
+ credentials['telegram']['verify'] = TELEGRAM_BOTNAME
102
+
103
+ with open(CREDENTIALS_PATH, 'w') as file:
104
+ yaml.safe_dump(credentials, file)
105
+
106
+ return True
107
+ except Exception as e:
108
+ logger.warning(f'Error updating {CREDENTIALS_PATH}: {e}')
109
+
110
+
111
+ # ---------------------
112
+ # Endpoint dependencies
113
+ # ---------------------
114
+ async def check_endpoint_availability():
115
+ if not CREDENTIALS_READY:
116
+ raise HTTPException(status_code=403, detail="Endpoint not available yet")
117
+ return True
app/api/requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SQLAlchemy
2
+ uvicorn[standard]
3
+ pgvector
4
+ python-multipart
5
+ psycopg2-binary
6
+ python-dotenv
7
+ fastapi[all]
8
+ langchain==0.0.157
9
+ tiktoken
10
+ aiofiles
11
+ sqlmodel
12
+ openai
app/api/seed.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ----------------
2
+ # Database imports
3
+ # ----------------
4
+ from helpers import (
5
+ create_org_by_org_or_uuid,
6
+ create_project_by_org,
7
+ create_document_by_file_path
8
+ )
9
+ from config import (
10
+ FILE_UPLOAD_PATH,
11
+ logger
12
+ )
13
+ from util import (
14
+ get_file_hash
15
+ )
16
+ import os
17
+
18
+ # --------------------
19
+ # Create organizations
20
+ # --------------------
21
+
22
+ organizations = [
23
+ {
24
+ 'display_name': 'Pepe Corp.',
25
+ 'namespace': 'pepe',
26
+ 'projects': [
27
+ {
28
+ 'display_name': 'Pepetamine',
29
+ 'docs': [
30
+ 'project-pepetamine.md'
31
+ ]
32
+ },
33
+ {
34
+ 'display_name': 'Frogonil',
35
+ 'docs': [
36
+ 'project-frogonil.md'
37
+ ]
38
+ },
39
+ {
40
+ 'display_name': 'Kekzal',
41
+ 'docs': [
42
+ 'project-kekzal.md'
43
+ ]
44
+ },
45
+ {
46
+ 'display_name': 'Memetrex',
47
+ 'docs': [
48
+ 'project-memetrex.md'
49
+ ]
50
+ },
51
+ {
52
+ 'display_name': 'PepeTrak',
53
+ 'docs': [
54
+ 'project-pepetrak.md'
55
+ ]
56
+ },
57
+ {
58
+ 'display_name': 'MemeGen',
59
+ 'docs': [
60
+ 'project-memegen.md'
61
+ ]
62
+ },
63
+ {
64
+ 'display_name': 'Neuro-kek',
65
+ 'docs': [
66
+ 'project-neurokek.md'
67
+ ]
68
+ },
69
+ {
70
+ 'display_name': 'Pepe Corp. (company)',
71
+ 'docs': [
72
+ 'org-about_the_company.md',
73
+ 'org-board_of_directors.md',
74
+ 'org-company_story.md',
75
+ 'org-corporate_philosophy.md',
76
+ 'org-customer_support.md',
77
+ 'org-earnings_fy2023.md',
78
+ 'org-management_team.md'
79
+ ]
80
+ }
81
+ ]
82
+ },
83
+ {
84
+ 'display_name': 'Umbrella Corp',
85
+ 'namespace': 'acme',
86
+ 'projects': [
87
+ {'display_name': 'T-Virus'},
88
+ {'display_name': 'G-Virus'},
89
+ {'display_name': 'Umbrella Corp. (company)'}
90
+ ]
91
+ },
92
+ {
93
+ 'display_name': 'Cyberdine Systems',
94
+ 'namespace': 'cyberdine',
95
+ 'projects': [
96
+ {'display_name': 'Skynet'},
97
+ {'display_name': 'Cyberdine Systems (company)'}
98
+ ]
99
+ },
100
+ {
101
+ 'display_name': 'Bluth Companies',
102
+ 'namespace': 'bluth',
103
+ 'projects': [
104
+ {'display_name': 'Bluth Company (company)'}
105
+ ]
106
+ },
107
+ {
108
+ 'display_name': 'Evil Corp',
109
+ 'namespace': 'evil',
110
+ 'projects': [
111
+ {'display_name': 'E-Coin'},
112
+ {'display_name': 'E-Corp Power'},
113
+ {'display_name': 'Bank of E Network'},
114
+ {'display_name': 'E Corp Labs'},
115
+ {'display_name': 'Evil Corp (company)'}
116
+ ]
117
+ }
118
+ ]
119
+
120
+ training_data_path = os.path.join(os.path.dirname(__file__), f'{FILE_UPLOAD_PATH}/training_data')
121
+
122
+ for org in organizations:
123
+
124
+ org_obj = create_org_by_org_or_uuid(
125
+ display_name=org['display_name'],
126
+ namespace=org['namespace']
127
+ )
128
+ logger.debug(f'🏠 Created organization: {org_obj.display_name}')
129
+
130
+ if 'projects' not in org:
131
+ continue
132
+
133
+ for project in org['projects']:
134
+ project['organization'] = org_obj
135
+
136
+ project_obj = create_project_by_org(
137
+ organization_id=org_obj,
138
+ display_name=project['display_name']
139
+ )
140
+ logger.debug(f'🗂️ Created project: {project_obj.display_name}')
141
+
142
+ project_uuid = str(project_obj.uuid)
143
+ org_uuid = str(org_obj.uuid)
144
+
145
+ # if the directory does not exist, create it
146
+ if not os.path.exists(os.path.join(FILE_UPLOAD_PATH, org_uuid, project_uuid)):
147
+ os.mkdir(os.path.join(FILE_UPLOAD_PATH, org_uuid, project_uuid))
148
+
149
+ if 'docs' not in project:
150
+ continue
151
+
152
+ for doc in project['docs']:
153
+ file_path = os.path.join(training_data_path, doc)
154
+
155
+ # check if file exists
156
+ if os.path.isfile(file_path):
157
+ file_hash = get_file_hash(file_path)
158
+ create_document_by_file_path(
159
+ organization=org_obj,
160
+ project=project_obj,
161
+ file_path=file_path,
162
+ file_hash=file_hash
163
+ )
164
+ logger.info(f' ✅ Created document: {doc}')
165
+ else:
166
+ logger.error(f' ❌ Document not found: {doc}')
app/api/static/img/rasagpt-icon-200x200.png ADDED
app/api/static/img/rasagpt-logo-1.png ADDED
app/api/static/img/rasagpt-logo-2.png ADDED
app/api/util.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import UploadFile
2
+ from functools import partial
3
+ from sqlmodel import Session
4
+ from hashlib import sha256
5
+ from uuid import UUID
6
+ import aiofiles
7
+ import json
8
+ import re
9
+ from config import (
10
+ logger
11
+ )
12
+
13
+ _snake_1 = partial(re.compile(r'(.)((?<![^A-Za-z])[A-Z][a-z]+)').sub, r'\1_\2')
14
+ _snake_2 = partial(re.compile(r'([a-z0-9])([A-Z])').sub, r'\1_\2')
15
+
16
+
17
+ def snake_case(string: str) -> str:
18
+ return _snake_2(_snake_1(string)).casefold()
19
+
20
+
21
+ def is_uuid(uuid: str):
22
+ uuid = str(uuid) if isinstance(uuid, UUID) else uuid
23
+ return re.match(r"^[0-9a-f]{8}-?[0-9a-f]{4}-?4[0-9a-f]{3}-?[89ab][0-9a-f]{3}-?[0-9a-f]{12}$", uuid)
24
+
25
+
26
+ async def save_file(file: UploadFile, file_path: str):
27
+ async with aiofiles.open(file_path, 'wb') as f:
28
+ await f.write(await file.read())
29
+
30
+
31
+ def get_sha256(contents: bytes):
32
+ return sha256(contents).hexdigest()
33
+
34
+
35
+ def get_file_hash(
36
+ file_path: str,
37
+ ):
38
+ with open(file_path, 'rb') as f:
39
+ file_hash = sha256(f.read()).hexdigest()
40
+
41
+ return file_hash
42
+
43
+
44
+ # -------------------
45
+ # Clean up LLM output
46
+ # -------------------
47
+ def sanitize_output(
48
+ str_output: str
49
+ ):
50
+ # Let's sanitize the JSON
51
+ res = str_output.replace("\n", '')
52
+
53
+ # If the first character is "?", remove it. Ran into this issue for some reason.
54
+ if res[0] == '?':
55
+ res = res[1:]
56
+
57
+ # check if response is valid json
58
+ try:
59
+ json.loads(res)
60
+ except json.JSONDecodeError:
61
+ raise ValueError(f'LLM response is not valid JSON: {res}')
62
+
63
+ if 'message' not in res or 'tags' not in res or 'is_escalate' not in res:
64
+ raise ValueError(f'LLM response is missing required fields: {res}')
65
+
66
+ logger.debug(f'Output: {res}')
67
+ return res
68
+
69
+ # ------------------
70
+ # Clean up LLM input
71
+ # ------------------
72
+ def sanitize_input(
73
+ str_input: str
74
+ ):
75
+ # Escape single quotes that cause output JSON issues
76
+ str_input = str_input.replace("'", "")
77
+
78
+ logger.debug(f'Input: {str_input}')
79
+ return str_input
80
+
app/api/wait-for-it.sh ADDED
File without changes
app/db/Dockerfile ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ FROM ankane/pgvector:latest
2
+
3
+ # Copy in the load-extensions script
4
+ COPY ./create_db.sh /docker-entrypoint-initdb.d/
5
+ RUN chmod +x /docker-entrypoint-initdb.d/create_db.sh
app/db/create_db.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # This is run one-time during the first time Postgres is initialized
4
+
5
+ echo "Creating database ${DB_NAME} and user..."
6
+
7
+ # Create the user and database "api"
8
+
9
+ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL
10
+ CREATE USER ${DB_USER} WITH ENCRYPTED PASSWORD '${DB_PASSWORD}';
11
+ CREATE DATABASE ${DB_NAME} OWNER ${DB_USER};
12
+ GRANT ALL PRIVILEGES ON DATABASE ${DB_NAME} TO ${DB_USER};
13
+ EOSQL
app/rasa-credentials/.dockerignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
app/rasa-credentials/Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8-slim
2
+
3
+ WORKDIR /app/rasa-credentials
4
+ COPY . .
5
+
6
+ # Add CURL
7
+ RUN apt-get -y update
8
+ RUN apt-get -y install curl
9
+
10
+
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+ # Expose the port
13
+ EXPOSE 8889
14
+
15
+ ENTRYPOINT ["uvicorn", "main:app", "--host", "rasa-credentials", "--port", "8889", "--reload"]
app/rasa-credentials/main.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import (
2
+ HTTPException,
3
+ FastAPI,
4
+ Depends,
5
+ )
6
+ import requests
7
+ import logging
8
+ import asyncio
9
+ import httpx
10
+ import yaml
11
+ import sys
12
+ import os
13
+
14
+ # ---------
15
+ # Constants
16
+ # ---------
17
+ CREDENTIALS_READY = False
18
+ RETRY_LIMIT = 10
19
+ RETRY_INTERVAL = 15
20
+
21
+ # ----------------
22
+ # Environment vars
23
+ # ----------------
24
+ NGROK_HOST = os.getenv("NGROK_HOST", "ngrok")
25
+ NGROK_PORT = os.getenv("NGROK_PORT", 4040)
26
+ NGROK_INTERNAL_WEBHOOK_HOST = os.getenv("NGROK_INTERNAL_WEBHOOK_HOST", "rasa-core")
27
+ NGROK_INTERNAL_WEBHOOK_PORT = os.getenv("NGROK_INTERNAL_WEBHOOK_PORT", 5005)
28
+ NGROK_API_URL = f"http://{NGROK_HOST}:{NGROK_PORT}"
29
+ TELEGRAM_ACCESS_TOKEN = os.getenv("TELEGRAM_ACCESS_TOKEN", None)
30
+ TELEGRAM_BOTNAME = os.getenv("TELEGRAM_BOTNAME", None)
31
+ CREDENTIALS_PATH = os.getenv("CREDENTIALS_PATH", "/app/rasa/credentials.yml")
32
+
33
+ # -------
34
+ # Logging
35
+ # -------
36
+ logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
37
+ logger = logging.getLogger(__name__)
38
+ logger.debug(
39
+ f"NGROK_HOST: {NGROK_HOST}:{NGROK_PORT}\nNGROK_API_URL: {NGROK_API_URL}\nNGROK_INTERNAL_WEBHOOK_HOST: {NGROK_INTERNAL_WEBHOOK_HOST}:{NGROK_INTERNAL_WEBHOOK_PORT}"
40
+ )
41
+
42
+
43
+ async def wait_for_ngrok_api():
44
+ while True:
45
+ try:
46
+ async with httpx.AsyncClient() as client:
47
+ response = await client.get(f"{NGROK_API_URL}/api/tunnels")
48
+ response.raise_for_status()
49
+ logger.debug("ngrok API is online.")
50
+ return True
51
+ except httpx.RequestError:
52
+ logger.debug("ngrok API is offline. Waiting...")
53
+ await asyncio.sleep(RETRY_INTERVAL)
54
+
55
+
56
+ async def get_active_tunnels():
57
+ try:
58
+ response = requests.get(f"{NGROK_API_URL}/api/tunnels")
59
+ response.raise_for_status()
60
+ tunnels = response.json()["tunnels"]
61
+ except requests.exceptions.HTTPError:
62
+ tunnels = []
63
+ return tunnels
64
+
65
+
66
+ async def stop_tunnel(tunnel):
67
+ tunnel_id = tunnel["name"]
68
+ response = requests.delete(f"{NGROK_API_URL}/api/tunnels/{tunnel_id}")
69
+ response.raise_for_status()
70
+
71
+
72
+ async def stop_all_tunnels():
73
+ active_tunnels = await get_active_tunnels()
74
+ if not active_tunnels:
75
+ logger.debug("No active tunnels found.")
76
+ else:
77
+ for tunnel in active_tunnels:
78
+ logger.debug(f"Stopping tunnel: {tunnel['name']} ({tunnel['public_url']})")
79
+ await stop_tunnel(tunnel)
80
+
81
+
82
+ async def get_tunnel(retry=0):
83
+ if retry > RETRY_LIMIT:
84
+ raise Exception(
85
+ f"Could not create ngrok tunnel. Exceed retry limit of {RETRY_LIMIT} attempts."
86
+ )
87
+
88
+ active_tunnels = await get_active_tunnels()
89
+ if len(active_tunnels) == 0:
90
+ logger.debug(f"No active tunnels found. Trying again in {RETRY_INTERVAL}s..")
91
+ await asyncio.sleep(RETRY_INTERVAL)
92
+ retry += 1
93
+ return await get_tunnel(retry=retry)
94
+ else:
95
+ return active_tunnels[0]["public_url"]
96
+
97
+
98
+ async def create_tunnel():
99
+ response = requests.post(
100
+ f"{NGROK_API_URL}/api/tunnels",
101
+ json={
102
+ "addr": f"{NGROK_INTERNAL_WEBHOOK_HOST}:{NGROK_INTERNAL_WEBHOOK_PORT}",
103
+ "proto": "http",
104
+ "name": NGROK_INTERNAL_WEBHOOK_HOST,
105
+ },
106
+ )
107
+ try:
108
+ response.raise_for_status()
109
+ return response.json()["public_url"]
110
+ except requests.exceptions.HTTPError as e:
111
+ logger.warning(f"Error creating ngrok tunnel: {e}")
112
+ return False
113
+
114
+
115
+ # ----------------------------
116
+ # Update Rasa credentials file
117
+ # ----------------------------
118
+ async def update_credentials_file(ngrok_url):
119
+ global CREDENTIALS_READY
120
+ try:
121
+ with open(CREDENTIALS_PATH, "r") as file:
122
+ credentials = yaml.safe_load(file)
123
+
124
+ credentials["custom_telegram.CustomTelegramInput"][
125
+ "webhook_url"
126
+ ] = f"{ngrok_url}/webhooks/telegram/webhook"
127
+ credentials["custom_telegram.CustomTelegramInput"][
128
+ "access_token"
129
+ ] = TELEGRAM_ACCESS_TOKEN
130
+ credentials["custom_telegram.CustomTelegramInput"]["verify"] = TELEGRAM_BOTNAME
131
+
132
+ with open(CREDENTIALS_PATH, "w") as file:
133
+ yaml.safe_dump(credentials, file)
134
+
135
+ CREDENTIALS_READY = True
136
+ except Exception as e:
137
+ logger.warning(f"Error updating {CREDENTIALS_PATH}: {e}")
138
+ sys.exit(1)
139
+
140
+
141
+ # -----------------
142
+ # FastAPI endpoints
143
+ # -----------------
144
+
145
+ app = FastAPI()
146
+
147
+
148
+ # -------------
149
+ # Startup event
150
+ # -------------
151
+ @app.on_event("startup")
152
+ async def startup_event():
153
+ env = os.getenv("ENV", None)
154
+ if env and env.lower() in ["dev", "development", "local"]:
155
+ await wait_for_ngrok_api()
156
+ url = await get_tunnel()
157
+ if not url:
158
+ logger.debug("No active tunnels found. Creating one...")
159
+ url = await create_tunnel()
160
+ logger.debug(f"Tunnel url: {url}")
161
+ await update_credentials_file(url)
162
+ else:
163
+ logger.debug("Not in dev environment. Skipping.")
164
+
165
+
166
+ # ---------------------
167
+ # Endpoint dependencies
168
+ # ---------------------
169
+ async def check_endpoint_availability():
170
+ if not CREDENTIALS_READY:
171
+ raise HTTPException(status_code=403, detail="Endpoint not available yet")
172
+ return True
173
+
174
+
175
+ # ---------------------
176
+ # Health check endpoint
177
+ # ---------------------
178
+ # This endpoint is used by docker-compose to check if the
179
+ # container is ready. If it is ready, Rasa core can start
180
+ @app.get("/", dependencies=[Depends(check_endpoint_availability)])
181
+ async def health_check():
182
+ return {"status": "ok"}
app/rasa-credentials/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ python-dotenv
2
+ rasa-sdk
3
+ requests
4
+ pyyaml
5
+ uvicorn[standard]
6
+ fastapi[all]
7
+ httpx
8
+ asyncio
app/rasa/.dockerignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
app/rasa/actions/Dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM khalosa/rasa-aarch64:3.5.2
2
+
3
+ COPY . /app
4
+
5
+ WORKDIR /app
6
+ RUN pip install python-dotenv rasa-sdk requests
7
+ EXPOSE 5055
8
+
9
+ CMD ["run", "actions", "--debug"]
app/rasa/actions/__init__.py ADDED
File without changes
app/rasa/actions/actions.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rasa_sdk.executor import CollectingDispatcher
2
+ from typing import Any, Text, Dict, List
3
+ from rasa_sdk import Action, Tracker
4
+ from dotenv import load_dotenv
5
+ from logging import getLogger
6
+ from enum import IntEnum
7
+ import os
8
+
9
+ logger = getLogger(__name__)
10
+
11
+ env = os.getenv("ENV", "local")
12
+ env_file = f".env-{env}"
13
+ load_dotenv(dotenv_path=f"../../.env-{env}")
14
+
15
+
16
+ MODEL_NAME = os.getenv("MODEL_NAME")
17
+ CHANNEL_TYPE = IntEnum(
18
+ "CHANNEL_TYPE", ["SMS", "TELEGRAM", "WHATSAPP", "EMAIL", "WEBSITE"]
19
+ )
20
+
21
+ logger = getLogger(__name__)
22
+
23
+
24
+ class ActionGPTFallback(Action):
25
+ def name(self) -> str:
26
+ return "action_gpt_fallback"
27
+
28
+ def get_channel(self, channel: str) -> CHANNEL_TYPE:
29
+ if channel == "telegram":
30
+ return CHANNEL_TYPE.TELEGRAM
31
+ elif channel == "whatsapp":
32
+ return CHANNEL_TYPE.WHATSAPP
33
+ elif channel == "sms":
34
+ return CHANNEL_TYPE.SMS
35
+ elif channel == "email":
36
+ return CHANNEL_TYPE.EMAIL
37
+ else:
38
+ return CHANNEL_TYPE.WEBSITE
39
+
40
+ def run(
41
+ self,
42
+ dispatcher: CollectingDispatcher,
43
+ tracker: Tracker,
44
+ domain: Dict[Text, Any],
45
+ ) -> List[Dict[Text, Any]]:
46
+ # ------------
47
+ # Get metadata
48
+ # ------------
49
+ data = tracker.latest_message
50
+ metadata = data['metadata'] if data and 'metadata' in data else None
51
+ response = metadata['response'] if metadata and 'response' in metadata else None
52
+ tags = metadata['tags'] if metadata and 'tags' in metadata else None
53
+ is_escalate = (
54
+ metadata['is_escalate'] if metadata and 'is_escalate' in metadata else None
55
+ )
56
+
57
+ if is_escalate is True:
58
+ response = f'{response} \n\n ⚠️💁 [ESCALATE TO HUMAN]'
59
+
60
+ if tags is not None:
61
+ response = f'{response} \n\n 🏷️ {",".join(tags)}'
62
+
63
+ logger.debug(
64
+ f"""[🤖 ActionGPTFallback]
65
+ data: {data}
66
+ metadata: {metadata}
67
+ response: {response}
68
+ tags: {tags}
69
+ is_escalate: {is_escalate}
70
+ """
71
+ )
72
+ dispatcher.utter_message(text=response)
73
+ return []
app/rasa/config.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ recipe: default.v1
2
+ assistant_id: 20230303-132941-slow-interest
3
+ language: en
4
+ policies:
5
+ - name: RulePolicy
app/rasa/credentials.yml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ custom_telegram.CustomTelegramInput:
2
+ access_token: null
3
+ verify: null
4
+ webhook_url: null
5
+ rasa:
6
+ url: http://rasa-core:5005/webhook
7
+ rest: null
app/rasa/custom_telegram.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rasa.core.channels.telegram import TelegramInput
2
+ from rasa.shared.utils import common as rasa_common
3
+ from typing import Any, Dict, Optional, Text
4
+ from sanic.request import Request
5
+
6
+
7
+ class CustomTelegramInput(TelegramInput):
8
+
9
+ def get_metadata(self, request: Request) -> Optional[Dict[Text, Any]]:
10
+
11
+ # For whatever reason, Rasa is unable to pass data via 'metadata' so 'meta' works for now
12
+ metadata = request.json.get('message', {}).get('meta')
13
+
14
+ # Debug
15
+ rasa_common.logger.debug(f'[🤖 ActionGPTFallback]\nmetadata: {metadata}')
16
+ return metadata if metadata is not None else None