Spaces:
Sleeping
Sleeping
Commit
·
3fa5f95
1
Parent(s):
ba4a6fd
username --- changed to -- user_id
Browse files- app.py +32 -22
- functions.py +63 -55
app.py
CHANGED
|
@@ -20,15 +20,16 @@ app.add_middleware(
|
|
| 20 |
allow_headers=["*"],
|
| 21 |
)
|
| 22 |
|
| 23 |
-
app.include_router(speech_translator_router, prefix="/speech")
|
| 24 |
|
| 25 |
|
| 26 |
@app.post("/signup")
|
| 27 |
-
async def sign_up(email, password):
|
| 28 |
try:
|
| 29 |
res, _ = supabase.auth.sign_up(
|
| 30 |
{"email": email, "password": password, "role": "user"}
|
| 31 |
)
|
|
|
|
| 32 |
response = {
|
| 33 |
"status": "success",
|
| 34 |
"code": 200,
|
|
@@ -56,6 +57,8 @@ async def sign_in(email, password):
|
|
| 56 |
user_id = res.user.id
|
| 57 |
access_token = res.session.access_token
|
| 58 |
refresh_token = res.session.refresh_token
|
|
|
|
|
|
|
| 59 |
store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
|
| 60 |
try:
|
| 61 |
store_id = store_session_check[1][0]["StoreID"]
|
|
@@ -113,10 +116,17 @@ async def set_session_data(access_token, refresh_token):
|
|
| 113 |
|
| 114 |
|
| 115 |
@app.post("/logout")
|
| 116 |
-
async def sign_out():
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
-
|
|
|
|
|
|
|
| 120 |
|
| 121 |
|
| 122 |
@app.post("/oauth")
|
|
@@ -129,13 +139,13 @@ async def oauth(provider):
|
|
| 129 |
@app.post("/newChatbot")
|
| 130 |
async def newChatbot(chatbotName: str, username: str):
|
| 131 |
currentBotCount = len(listTables(username=username)["output"])
|
| 132 |
-
limit = client.table("ConversAI_UserConfig").select("chatbotLimit").eq("
|
| 133 |
"chatbotLimit"]
|
| 134 |
if currentBotCount >= int(limit):
|
| 135 |
return {
|
| 136 |
"output": "CHATBOT LIMIT EXCEEDED"
|
| 137 |
}
|
| 138 |
-
client.table("ConversAI_ChatbotInfo").insert({"
|
| 139 |
chatbotName = f"convai-{username}-{chatbotName}"
|
| 140 |
return createTable(tablename=chatbotName)
|
| 141 |
|
|
@@ -149,12 +159,12 @@ async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
|
|
| 149 |
text += page.extract_text()
|
| 150 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
| 151 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 152 |
-
currentCount = df[(df["
|
| 153 |
-
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("
|
| 154 |
"tokenLimit"]
|
| 155 |
newCount = currentCount + len(text)
|
| 156 |
if newCount < int(limit):
|
| 157 |
-
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("
|
| 158 |
"chatbotname", chatbotname).execute()
|
| 159 |
return addDocuments(text=text, vectorstore=vectorstore)
|
| 160 |
else:
|
|
@@ -174,12 +184,12 @@ async def returnText(pdf: UploadFile = File(...)):
|
|
| 174 |
async def addText(vectorstore: str, text: str):
|
| 175 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
| 176 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 177 |
-
currentCount = df[(df["
|
| 178 |
newCount = currentCount + len(text)
|
| 179 |
-
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("
|
| 180 |
"tokenLimit"]
|
| 181 |
if newCount < int(limit):
|
| 182 |
-
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("
|
| 183 |
"chatbotname", chatbotname).execute()
|
| 184 |
return addDocuments(text=text, vectorstore=vectorstore)
|
| 185 |
else:
|
|
@@ -198,13 +208,13 @@ class AddQAPair(BaseModel):
|
|
| 198 |
async def addText(addQaPair: AddQAPair):
|
| 199 |
username, chatbotname = addQaPair.vectorstore.split("-")[1], addQaPair.vectorstore.split("-")[2]
|
| 200 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 201 |
-
currentCount = df[(df["
|
| 202 |
qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}"
|
| 203 |
newCount = currentCount + len(qa)
|
| 204 |
-
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("
|
| 205 |
"tokenLimit"]
|
| 206 |
if newCount < int(limit):
|
| 207 |
-
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("
|
| 208 |
"chatbotname", chatbotname).execute()
|
| 209 |
return addDocuments(text=qa, vectorstore=addQaPair.vectorstore)
|
| 210 |
else:
|
|
@@ -222,12 +232,12 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
|
| 222 |
[f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
|
| 223 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
| 224 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 225 |
-
currentCount = df[(df["
|
| 226 |
newCount = currentCount + len(text)
|
| 227 |
-
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("
|
| 228 |
"tokenLimit"]
|
| 229 |
if newCount < int(limit):
|
| 230 |
-
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("
|
| 231 |
"chatbotname", chatbotname).execute()
|
| 232 |
return addDocuments(text=text, vectorstore=vectorstore)
|
| 233 |
else:
|
|
@@ -244,7 +254,7 @@ async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-7
|
|
| 244 |
@app.post("/deleteChatbot")
|
| 245 |
async def delete(chatbotName: str):
|
| 246 |
username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
|
| 247 |
-
client.table('ConversAI_ChatbotInfo').delete().eq('
|
| 248 |
return deleteTable(tableName=chatbotName)
|
| 249 |
|
| 250 |
|
|
@@ -265,7 +275,7 @@ async def getCount(vectorstore: str):
|
|
| 265 |
username, chatbotName = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
| 266 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 267 |
return {
|
| 268 |
-
"currentCount": df[(df['
|
| 269 |
}
|
| 270 |
|
| 271 |
|
|
@@ -294,4 +304,4 @@ async def analyzeAndAnswer(query: str, file: UploadFile = File(...)):
|
|
| 294 |
except:
|
| 295 |
return {
|
| 296 |
"output": "UNABLE TO ANSWER QUERY"
|
| 297 |
-
}
|
|
|
|
| 20 |
allow_headers=["*"],
|
| 21 |
)
|
| 22 |
|
| 23 |
+
# app.include_router(speech_translator_router, prefix="/speech")
|
| 24 |
|
| 25 |
|
| 26 |
@app.post("/signup")
|
| 27 |
+
async def sign_up(email, username, password):
|
| 28 |
try:
|
| 29 |
res, _ = supabase.auth.sign_up(
|
| 30 |
{"email": email, "password": password, "role": "user"}
|
| 31 |
)
|
| 32 |
+
createUser(username=username)
|
| 33 |
response = {
|
| 34 |
"status": "success",
|
| 35 |
"code": 200,
|
|
|
|
| 57 |
user_id = res.user.id
|
| 58 |
access_token = res.session.access_token
|
| 59 |
refresh_token = res.session.refresh_token
|
| 60 |
+
createUser(username=user_id)
|
| 61 |
+
|
| 62 |
store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
|
| 63 |
try:
|
| 64 |
store_id = store_session_check[1][0]["StoreID"]
|
|
|
|
| 116 |
|
| 117 |
|
| 118 |
@app.post("/logout")
|
| 119 |
+
async def sign_out(store_id):
|
| 120 |
+
try:
|
| 121 |
+
supabase.table("Stores").delete().eq(
|
| 122 |
+
"StoreID", store_id
|
| 123 |
+
).execute()
|
| 124 |
+
res = supabase.auth.sign_out()
|
| 125 |
+
response = {"message": "success"}
|
| 126 |
|
| 127 |
+
return response
|
| 128 |
+
except Exception as e:
|
| 129 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 130 |
|
| 131 |
|
| 132 |
@app.post("/oauth")
|
|
|
|
| 139 |
@app.post("/newChatbot")
|
| 140 |
async def newChatbot(chatbotName: str, username: str):
|
| 141 |
currentBotCount = len(listTables(username=username)["output"])
|
| 142 |
+
limit = client.table("ConversAI_UserConfig").select("chatbotLimit").eq("user_id", username).execute().data[0][
|
| 143 |
"chatbotLimit"]
|
| 144 |
if currentBotCount >= int(limit):
|
| 145 |
return {
|
| 146 |
"output": "CHATBOT LIMIT EXCEEDED"
|
| 147 |
}
|
| 148 |
+
client.table("ConversAI_ChatbotInfo").insert({"user_id": username, "chatbotname": chatbotName}).execute()
|
| 149 |
chatbotName = f"convai-{username}-{chatbotName}"
|
| 150 |
return createTable(tablename=chatbotName)
|
| 151 |
|
|
|
|
| 159 |
text += page.extract_text()
|
| 160 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
| 161 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 162 |
+
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
| 163 |
+
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
|
| 164 |
"tokenLimit"]
|
| 165 |
newCount = currentCount + len(text)
|
| 166 |
if newCount < int(limit):
|
| 167 |
+
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
| 168 |
"chatbotname", chatbotname).execute()
|
| 169 |
return addDocuments(text=text, vectorstore=vectorstore)
|
| 170 |
else:
|
|
|
|
| 184 |
async def addText(vectorstore: str, text: str):
|
| 185 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
| 186 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 187 |
+
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
| 188 |
newCount = currentCount + len(text)
|
| 189 |
+
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
|
| 190 |
"tokenLimit"]
|
| 191 |
if newCount < int(limit):
|
| 192 |
+
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
| 193 |
"chatbotname", chatbotname).execute()
|
| 194 |
return addDocuments(text=text, vectorstore=vectorstore)
|
| 195 |
else:
|
|
|
|
| 208 |
async def addText(addQaPair: AddQAPair):
|
| 209 |
username, chatbotname = addQaPair.vectorstore.split("-")[1], addQaPair.vectorstore.split("-")[2]
|
| 210 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 211 |
+
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
| 212 |
qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}"
|
| 213 |
newCount = currentCount + len(qa)
|
| 214 |
+
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
|
| 215 |
"tokenLimit"]
|
| 216 |
if newCount < int(limit):
|
| 217 |
+
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
| 218 |
"chatbotname", chatbotname).execute()
|
| 219 |
return addDocuments(text=qa, vectorstore=addQaPair.vectorstore)
|
| 220 |
else:
|
|
|
|
| 232 |
[f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
|
| 233 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
| 234 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 235 |
+
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
| 236 |
newCount = currentCount + len(text)
|
| 237 |
+
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
|
| 238 |
"tokenLimit"]
|
| 239 |
if newCount < int(limit):
|
| 240 |
+
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
| 241 |
"chatbotname", chatbotname).execute()
|
| 242 |
return addDocuments(text=text, vectorstore=vectorstore)
|
| 243 |
else:
|
|
|
|
| 254 |
@app.post("/deleteChatbot")
|
| 255 |
async def delete(chatbotName: str):
|
| 256 |
username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
|
| 257 |
+
client.table('ConversAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname', chatbotName).execute()
|
| 258 |
return deleteTable(tableName=chatbotName)
|
| 259 |
|
| 260 |
|
|
|
|
| 275 |
username, chatbotName = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
| 276 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 277 |
return {
|
| 278 |
+
"currentCount": df[(df['user_id'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
|
| 279 |
}
|
| 280 |
|
| 281 |
|
|
|
|
| 304 |
except:
|
| 305 |
return {
|
| 306 |
"output": "UNABLE TO ANSWER QUERY"
|
| 307 |
+
}
|
functions.py
CHANGED
|
@@ -32,19 +32,18 @@ import base64
|
|
| 32 |
import time
|
| 33 |
import requests
|
| 34 |
|
| 35 |
-
|
| 36 |
load_dotenv("secrets.env")
|
| 37 |
client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
|
| 38 |
qdrantClient = QdrantClient(url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"])
|
| 39 |
model_kwargs = {"device": "cuda"}
|
| 40 |
encode_kwargs = {"normalize_embeddings": True}
|
| 41 |
vectorEmbeddings = HuggingFaceEmbeddings(
|
| 42 |
-
model_name
|
| 43 |
-
model_kwargs
|
| 44 |
-
encode_kwargs
|
| 45 |
)
|
| 46 |
-
reader = easyocr.Reader(['en'], gpu
|
| 47 |
-
sparseEmbeddings = FastEmbedSparse(model
|
| 48 |
prompt = """
|
| 49 |
INSTRUCTIONS:
|
| 50 |
=====================================
|
|
@@ -81,46 +80,48 @@ store = InMemoryStore()
|
|
| 81 |
chatHistoryStore = dict()
|
| 82 |
|
| 83 |
|
| 84 |
-
def createUser(username: str
|
| 85 |
try:
|
| 86 |
userData = client.table("ConversAI_UserInfo").select("*").execute().data
|
| 87 |
-
if username not in [userData[x]["
|
| 88 |
-
client.table("ConversAI_UserInfo").insert({"
|
| 89 |
-
client.table("ConversAI_UserConfig").insert({"
|
| 90 |
return {
|
| 91 |
"output": "SUCCESS"
|
| 92 |
}
|
| 93 |
-
else:
|
| 94 |
return {
|
| 95 |
"output": "USER ALREADY EXISTS"
|
| 96 |
}
|
| 97 |
except Exception as e:
|
| 98 |
return {
|
| 99 |
"error": e
|
| 100 |
-
}
|
| 101 |
|
| 102 |
|
| 103 |
-
def matchPassword(username: str, password: str) -> str:
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
| 116 |
|
| 117 |
|
| 118 |
def createTable(tablename: str):
|
| 119 |
global vectorEmbeddings
|
| 120 |
global sparseEmbeddings
|
| 121 |
qdrant = QdrantVectorStore.from_documents(
|
| 122 |
-
documents
|
| 123 |
-
embedding
|
| 124 |
sparse_embedding=sparseEmbeddings,
|
| 125 |
url=os.environ["QDRANT_URL"],
|
| 126 |
prefer_grpc=True,
|
|
@@ -132,21 +133,22 @@ def createTable(tablename: str):
|
|
| 132 |
"output": "SUCCESS"
|
| 133 |
}
|
| 134 |
|
|
|
|
| 135 |
def addDocuments(text: str, vectorstore: str):
|
| 136 |
global vectorEmbeddings
|
| 137 |
global sparseEmbeddings
|
| 138 |
global store
|
| 139 |
parentSplitter = RecursiveCharacterTextSplitter(
|
| 140 |
-
chunk_size
|
| 141 |
-
add_start_index
|
| 142 |
)
|
| 143 |
childSplitter = RecursiveCharacterTextSplitter(
|
| 144 |
-
chunk_size
|
| 145 |
-
add_start_index
|
| 146 |
)
|
| 147 |
-
texts = [Document(page_content
|
| 148 |
vectorstore = QdrantVectorStore.from_existing_collection(
|
| 149 |
-
embedding
|
| 150 |
sparse_embedding=sparseEmbeddings,
|
| 151 |
collection_name=vectorstore,
|
| 152 |
url=os.environ["QDRANT_URL"],
|
|
@@ -159,7 +161,7 @@ def addDocuments(text: str, vectorstore: str):
|
|
| 159 |
child_splitter=childSplitter,
|
| 160 |
parent_splitter=parentSplitter
|
| 161 |
)
|
| 162 |
-
retriever.add_documents(documents
|
| 163 |
return {
|
| 164 |
"output": "SUCCESS"
|
| 165 |
}
|
|
@@ -169,7 +171,8 @@ def format_docs(docs: str):
|
|
| 169 |
context = "\n\n".join(doc.page_content for doc in docs)
|
| 170 |
if context == "":
|
| 171 |
context = "No context found"
|
| 172 |
-
else:
|
|
|
|
| 173 |
return context
|
| 174 |
|
| 175 |
|
|
@@ -186,19 +189,19 @@ def trimMessages(chain_input):
|
|
| 186 |
pass
|
| 187 |
else:
|
| 188 |
chatHistoryStore[storeName].clear()
|
| 189 |
-
for message in messages[-1:
|
| 190 |
chatHistoryStore[storeName].add_message(message)
|
| 191 |
return True
|
| 192 |
|
| 193 |
|
| 194 |
def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192") -> str:
|
| 195 |
-
global prompt
|
| 196 |
global client
|
| 197 |
global vectorEmbeddings
|
| 198 |
global sparseEmbeddings
|
| 199 |
vectorStoreName = vectorstore
|
| 200 |
vectorstore = QdrantVectorStore.from_existing_collection(
|
| 201 |
-
embedding
|
| 202 |
sparse_embedding=sparseEmbeddings,
|
| 203 |
collection_name=vectorstore,
|
| 204 |
url=os.environ["QDRANT_URL"],
|
|
@@ -216,25 +219,25 @@ def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192")
|
|
| 216 |
base_compressor=compressor, base_retriever=retriever
|
| 217 |
)
|
| 218 |
baseChain = (
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
|
|
|
| 224 |
messageChain = RunnableWithMessageHistory(
|
| 225 |
baseChain,
|
| 226 |
get_session_history,
|
| 227 |
-
input_messages_key
|
| 228 |
-
history_messages_key
|
| 229 |
)
|
| 230 |
-
chain = RunnablePassthrough.assign(messages_trimmed
|
| 231 |
return {
|
| 232 |
"output": chain.invoke(
|
| 233 |
{"question": query},
|
| 234 |
{"configurable": {"session_id": vectorStoreName}}
|
| 235 |
)
|
| 236 |
}
|
| 237 |
-
|
| 238 |
|
| 239 |
|
| 240 |
def deleteTable(tableName: str):
|
|
@@ -249,21 +252,24 @@ def deleteTable(tableName: str):
|
|
| 249 |
"error": e
|
| 250 |
}
|
| 251 |
|
|
|
|
| 252 |
def listTables(username: str):
|
| 253 |
try:
|
| 254 |
global qdrantClient
|
| 255 |
qdrantCollections = qdrantClient.get_collections()
|
| 256 |
return {
|
| 257 |
-
"output": list(filter(lambda x: True if x.split("-")[1] == username else False,
|
|
|
|
| 258 |
}
|
| 259 |
except Exception as e:
|
| 260 |
return {
|
| 261 |
"error": e
|
| 262 |
}
|
| 263 |
-
|
| 264 |
|
| 265 |
-
|
|
|
|
| 266 |
start = time.time()
|
|
|
|
| 267 |
def getLinksFromPage(url: str) -> list:
|
| 268 |
response = requests.get(url)
|
| 269 |
soup = BeautifulSoup(response.content, "lxml")
|
|
@@ -281,6 +287,7 @@ def getLinks(url: str, timeout = 30):
|
|
| 281 |
else:
|
| 282 |
continue
|
| 283 |
return links
|
|
|
|
| 284 |
links = getLinksFromPage(url)
|
| 285 |
uniqueLinks = set()
|
| 286 |
for link in links:
|
|
@@ -292,22 +299,23 @@ def getLinks(url: str, timeout = 30):
|
|
| 292 |
return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
|
| 293 |
|
| 294 |
|
| 295 |
-
|
| 296 |
def getTextFromImagePDF(pdfBytes):
|
| 297 |
def getText(image):
|
| 298 |
global reader
|
| 299 |
return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
|
|
|
| 300 |
allImages = convert_from_bytes(pdfBytes)
|
| 301 |
texts = [getText(image) for image in allImages]
|
| 302 |
return "\n\n\n".join(texts)
|
| 303 |
|
|
|
|
| 304 |
def getTranscript(urls: str):
|
| 305 |
urls = urls.split(",")
|
| 306 |
texts = []
|
| 307 |
for url in urls:
|
| 308 |
try:
|
| 309 |
loader = YoutubeLoader.from_youtube_url(
|
| 310 |
-
url, add_video_info
|
| 311 |
)
|
| 312 |
doc = " ".join([x.page_content for x in loader.load()])
|
| 313 |
texts.append(doc)
|
|
@@ -318,12 +326,12 @@ def getTranscript(urls: str):
|
|
| 318 |
|
| 319 |
|
| 320 |
def analyzeData(query, dataframe):
|
| 321 |
-
llm = ChatGroq(name
|
| 322 |
-
df = SmartDataframe(dataframe, config
|
| 323 |
response = df.chat(query)
|
| 324 |
if os.path.isfile(response):
|
| 325 |
with open(response, "rb") as file:
|
| 326 |
b64string = base64.b64encode(file.read()).decode("utf-8")
|
| 327 |
return f"data:image/png;base64,{b64string}"
|
| 328 |
else:
|
| 329 |
-
return response
|
|
|
|
| 32 |
import time
|
| 33 |
import requests
|
| 34 |
|
|
|
|
| 35 |
load_dotenv("secrets.env")
|
| 36 |
client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
|
| 37 |
qdrantClient = QdrantClient(url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"])
|
| 38 |
model_kwargs = {"device": "cuda"}
|
| 39 |
encode_kwargs = {"normalize_embeddings": True}
|
| 40 |
vectorEmbeddings = HuggingFaceEmbeddings(
|
| 41 |
+
model_name="BAAI/bge-m3",
|
| 42 |
+
model_kwargs=model_kwargs,
|
| 43 |
+
encode_kwargs=encode_kwargs
|
| 44 |
)
|
| 45 |
+
reader = easyocr.Reader(['en'], gpu=True, model_storage_directory="/app/EasyOCRModels")
|
| 46 |
+
sparseEmbeddings = FastEmbedSparse(model="Qdrant/BM25")
|
| 47 |
prompt = """
|
| 48 |
INSTRUCTIONS:
|
| 49 |
=====================================
|
|
|
|
| 80 |
chatHistoryStore = dict()
|
| 81 |
|
| 82 |
|
| 83 |
+
def createUser(username: str) -> dict:
|
| 84 |
try:
|
| 85 |
userData = client.table("ConversAI_UserInfo").select("*").execute().data
|
| 86 |
+
if username not in [userData[x]["user_id"] for x in range(len(userData))]:
|
| 87 |
+
client.table("ConversAI_UserInfo").insert({"user_id": username}).execute()
|
| 88 |
+
client.table("ConversAI_UserConfig").insert({"user_id": username}).execute()
|
| 89 |
return {
|
| 90 |
"output": "SUCCESS"
|
| 91 |
}
|
| 92 |
+
else:
|
| 93 |
return {
|
| 94 |
"output": "USER ALREADY EXISTS"
|
| 95 |
}
|
| 96 |
except Exception as e:
|
| 97 |
return {
|
| 98 |
"error": e
|
| 99 |
+
}
|
| 100 |
|
| 101 |
|
| 102 |
+
# def matchPassword(username: str, password: str) -> str:
|
| 103 |
+
# response = (
|
| 104 |
+
# client.table("ConversAI_UserInfo")
|
| 105 |
+
# .select("*")
|
| 106 |
+
# .eq("username", username)
|
| 107 |
+
# .execute()
|
| 108 |
+
# )
|
| 109 |
+
# try:
|
| 110 |
+
# return {
|
| 111 |
+
# "output": password == response.data[0]["password"]
|
| 112 |
+
# }
|
| 113 |
+
# except:
|
| 114 |
+
# return {
|
| 115 |
+
# "output": "USER DOESN'T EXIST"
|
| 116 |
+
# }
|
| 117 |
|
| 118 |
|
| 119 |
def createTable(tablename: str):
|
| 120 |
global vectorEmbeddings
|
| 121 |
global sparseEmbeddings
|
| 122 |
qdrant = QdrantVectorStore.from_documents(
|
| 123 |
+
documents=[],
|
| 124 |
+
embedding=vectorEmbeddings,
|
| 125 |
sparse_embedding=sparseEmbeddings,
|
| 126 |
url=os.environ["QDRANT_URL"],
|
| 127 |
prefer_grpc=True,
|
|
|
|
| 133 |
"output": "SUCCESS"
|
| 134 |
}
|
| 135 |
|
| 136 |
+
|
| 137 |
def addDocuments(text: str, vectorstore: str):
|
| 138 |
global vectorEmbeddings
|
| 139 |
global sparseEmbeddings
|
| 140 |
global store
|
| 141 |
parentSplitter = RecursiveCharacterTextSplitter(
|
| 142 |
+
chunk_size=2100,
|
| 143 |
+
add_start_index=True
|
| 144 |
)
|
| 145 |
childSplitter = RecursiveCharacterTextSplitter(
|
| 146 |
+
chunk_size=300,
|
| 147 |
+
add_start_index=True
|
| 148 |
)
|
| 149 |
+
texts = [Document(page_content=text)]
|
| 150 |
vectorstore = QdrantVectorStore.from_existing_collection(
|
| 151 |
+
embedding=vectorEmbeddings,
|
| 152 |
sparse_embedding=sparseEmbeddings,
|
| 153 |
collection_name=vectorstore,
|
| 154 |
url=os.environ["QDRANT_URL"],
|
|
|
|
| 161 |
child_splitter=childSplitter,
|
| 162 |
parent_splitter=parentSplitter
|
| 163 |
)
|
| 164 |
+
retriever.add_documents(documents=texts)
|
| 165 |
return {
|
| 166 |
"output": "SUCCESS"
|
| 167 |
}
|
|
|
|
| 171 |
context = "\n\n".join(doc.page_content for doc in docs)
|
| 172 |
if context == "":
|
| 173 |
context = "No context found"
|
| 174 |
+
else:
|
| 175 |
+
pass
|
| 176 |
return context
|
| 177 |
|
| 178 |
|
|
|
|
| 189 |
pass
|
| 190 |
else:
|
| 191 |
chatHistoryStore[storeName].clear()
|
| 192 |
+
for message in messages[-1:]:
|
| 193 |
chatHistoryStore[storeName].add_message(message)
|
| 194 |
return True
|
| 195 |
|
| 196 |
|
| 197 |
def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192") -> str:
|
| 198 |
+
global prompt
|
| 199 |
global client
|
| 200 |
global vectorEmbeddings
|
| 201 |
global sparseEmbeddings
|
| 202 |
vectorStoreName = vectorstore
|
| 203 |
vectorstore = QdrantVectorStore.from_existing_collection(
|
| 204 |
+
embedding=vectorEmbeddings,
|
| 205 |
sparse_embedding=sparseEmbeddings,
|
| 206 |
collection_name=vectorstore,
|
| 207 |
url=os.environ["QDRANT_URL"],
|
|
|
|
| 219 |
base_compressor=compressor, base_retriever=retriever
|
| 220 |
)
|
| 221 |
baseChain = (
|
| 222 |
+
{"context": RunnableLambda(lambda x: x["question"]) | retriever | RunnableLambda(format_docs),
|
| 223 |
+
"question": RunnablePassthrough(), "chatHistory": RunnablePassthrough()}
|
| 224 |
+
| prompt
|
| 225 |
+
| ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
|
| 226 |
+
| StrOutputParser()
|
| 227 |
+
)
|
| 228 |
messageChain = RunnableWithMessageHistory(
|
| 229 |
baseChain,
|
| 230 |
get_session_history,
|
| 231 |
+
input_messages_key="question",
|
| 232 |
+
history_messages_key="chatHistory"
|
| 233 |
)
|
| 234 |
+
chain = RunnablePassthrough.assign(messages_trimmed=trimMessages) | messageChain
|
| 235 |
return {
|
| 236 |
"output": chain.invoke(
|
| 237 |
{"question": query},
|
| 238 |
{"configurable": {"session_id": vectorStoreName}}
|
| 239 |
)
|
| 240 |
}
|
|
|
|
| 241 |
|
| 242 |
|
| 243 |
def deleteTable(tableName: str):
|
|
|
|
| 252 |
"error": e
|
| 253 |
}
|
| 254 |
|
| 255 |
+
|
| 256 |
def listTables(username: str):
|
| 257 |
try:
|
| 258 |
global qdrantClient
|
| 259 |
qdrantCollections = qdrantClient.get_collections()
|
| 260 |
return {
|
| 261 |
+
"output": list(filter(lambda x: True if x.split("-")[1] == username else False,
|
| 262 |
+
[x.name for x in qdrantCollections.collections]))
|
| 263 |
}
|
| 264 |
except Exception as e:
|
| 265 |
return {
|
| 266 |
"error": e
|
| 267 |
}
|
|
|
|
| 268 |
|
| 269 |
+
|
| 270 |
+
def getLinks(url: str, timeout=30):
|
| 271 |
start = time.time()
|
| 272 |
+
|
| 273 |
def getLinksFromPage(url: str) -> list:
|
| 274 |
response = requests.get(url)
|
| 275 |
soup = BeautifulSoup(response.content, "lxml")
|
|
|
|
| 287 |
else:
|
| 288 |
continue
|
| 289 |
return links
|
| 290 |
+
|
| 291 |
links = getLinksFromPage(url)
|
| 292 |
uniqueLinks = set()
|
| 293 |
for link in links:
|
|
|
|
| 299 |
return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
|
| 300 |
|
| 301 |
|
|
|
|
| 302 |
def getTextFromImagePDF(pdfBytes):
|
| 303 |
def getText(image):
|
| 304 |
global reader
|
| 305 |
return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
| 306 |
+
|
| 307 |
allImages = convert_from_bytes(pdfBytes)
|
| 308 |
texts = [getText(image) for image in allImages]
|
| 309 |
return "\n\n\n".join(texts)
|
| 310 |
|
| 311 |
+
|
| 312 |
def getTranscript(urls: str):
|
| 313 |
urls = urls.split(",")
|
| 314 |
texts = []
|
| 315 |
for url in urls:
|
| 316 |
try:
|
| 317 |
loader = YoutubeLoader.from_youtube_url(
|
| 318 |
+
url, add_video_info=False
|
| 319 |
)
|
| 320 |
doc = " ".join([x.page_content for x in loader.load()])
|
| 321 |
texts.append(doc)
|
|
|
|
| 326 |
|
| 327 |
|
| 328 |
def analyzeData(query, dataframe):
|
| 329 |
+
llm = ChatGroq(name="llama-3.1-8b-instant")
|
| 330 |
+
df = SmartDataframe(dataframe, config={"llm": llm, "verbose": False})
|
| 331 |
response = df.chat(query)
|
| 332 |
if os.path.isfile(response):
|
| 333 |
with open(response, "rb") as file:
|
| 334 |
b64string = base64.b64encode(file.read()).decode("utf-8")
|
| 335 |
return f"data:image/png;base64,{b64string}"
|
| 336 |
else:
|
| 337 |
+
return response
|