Spaces:
Sleeping
Sleeping
DEBUG: base64 -> plain text
Browse files- app.py +17 -10
- functions.py +18 -0
app.py
CHANGED
|
@@ -2,9 +2,8 @@ import io
|
|
| 2 |
import tempfile
|
| 3 |
from ipaddress import ip_address
|
| 4 |
from typing import Optional
|
| 5 |
-
import nltk
|
| 6 |
-
import jwt
|
| 7 |
import base64
|
|
|
|
| 8 |
import json
|
| 9 |
from click import option
|
| 10 |
from jwt import ExpiredSignatureError, InvalidTokenError
|
|
@@ -265,7 +264,7 @@ async def loadPDF(vectorstore: str, pdf: UploadFile = File(...)):
|
|
| 265 |
"output": text,
|
| 266 |
"source": source
|
| 267 |
}
|
| 268 |
-
numTokens = len(" ".join([
|
| 269 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 270 |
fileName = createDataSourceName(sourceName=source)
|
| 271 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
@@ -297,7 +296,7 @@ async def loadImagePDF(vectorstore: str, pdf: UploadFile = File(...)):
|
|
| 297 |
}
|
| 298 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 299 |
fileName = createDataSourceName(sourceName=source)
|
| 300 |
-
numTokens = len(" ".join([
|
| 301 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
| 302 |
response = (
|
| 303 |
supabase.table("ConversAI_ChatbotDataSources")
|
|
@@ -321,8 +320,8 @@ class AddText(BaseModel):
|
|
| 321 |
|
| 322 |
@app.post("/loadText")
|
| 323 |
async def loadText(addTextConfig: AddText):
|
| 324 |
-
trackUsage(vectorstore=vectorstore, endpoint="/loadText")
|
| 325 |
vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
|
|
|
|
| 326 |
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
| 327 |
text = cleanText(text = text)
|
| 328 |
dct = {
|
|
@@ -389,7 +388,7 @@ async def loadWebURLs(loadWebsite: LoadWebsite):
|
|
| 389 |
"output": text,
|
| 390 |
"source": source
|
| 391 |
}
|
| 392 |
-
numTokens = len(" ".join([
|
| 393 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 394 |
fileName = createDataSourceName(sourceName=source)
|
| 395 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
@@ -472,7 +471,7 @@ async def loadYoutubeTranscript(ytTranscript: YtTranscript):
|
|
| 472 |
"output": text,
|
| 473 |
"source": "www.youtube.com"
|
| 474 |
}
|
| 475 |
-
numTokens = len(" ".join([
|
| 476 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 477 |
fileName = createDataSourceName(sourceName="youtube")
|
| 478 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
@@ -531,6 +530,13 @@ async def listChatbotSources(vectorstore: str):
|
|
| 531 |
return result
|
| 532 |
|
| 533 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
@app.post("/deleteChatbotSource")
|
| 535 |
async def deleteChatbotSource(vectorstore: str, dataSourceName: str):
|
| 536 |
trackUsage(vectorstore=vectorstore, endpoint="/deleteChatbotSource")
|
|
@@ -552,7 +558,8 @@ class LoadEditedJson(BaseModel):
|
|
| 552 |
async def loadEditedJson(loadEditedJsonConfig: LoadEditedJson):
|
| 553 |
username, chatbotName = loadEditedJsonConfig.vectorstore.split("$")[1], loadEditedJsonConfig.vectorstore.split("$")[2]
|
| 554 |
trackUsage(vectorstore=loadEditedJsonConfig.vectorstore, endpoint="/loadEditedJson")
|
| 555 |
-
jsonData =
|
|
|
|
| 556 |
fileName = createDataSourceName(loadEditedJsonConfig.dataSourceName)
|
| 557 |
response = supabase.storage.from_("ConversAI").upload(file=jsonData, path=f"{fileName}_data.json")
|
| 558 |
response = (
|
|
@@ -608,7 +615,7 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
|
|
| 608 |
content = file["output"]
|
| 609 |
fileSource = file["source"]
|
| 610 |
texts.append(".".join(
|
| 611 |
-
[
|
| 612 |
"\n", " "))
|
| 613 |
sources.append(fileSource)
|
| 614 |
elif fileType == "/loadText":
|
|
@@ -624,7 +631,7 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
|
|
| 624 |
content = file["output"]
|
| 625 |
fileSource = file["source"]
|
| 626 |
texts.append(".".join(
|
| 627 |
-
[
|
| 628 |
"\n", " "))
|
| 629 |
sources.append(fileSource)
|
| 630 |
else:
|
|
|
|
| 2 |
import tempfile
|
| 3 |
from ipaddress import ip_address
|
| 4 |
from typing import Optional
|
|
|
|
|
|
|
| 5 |
import base64
|
| 6 |
+
import jwt
|
| 7 |
import json
|
| 8 |
from click import option
|
| 9 |
from jwt import ExpiredSignatureError, InvalidTokenError
|
|
|
|
| 264 |
"output": text,
|
| 265 |
"source": source
|
| 266 |
}
|
| 267 |
+
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 268 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 269 |
fileName = createDataSourceName(sourceName=source)
|
| 270 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
|
|
| 296 |
}
|
| 297 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 298 |
fileName = createDataSourceName(sourceName=source)
|
| 299 |
+
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 300 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
| 301 |
response = (
|
| 302 |
supabase.table("ConversAI_ChatbotDataSources")
|
|
|
|
| 320 |
|
| 321 |
@app.post("/loadText")
|
| 322 |
async def loadText(addTextConfig: AddText):
|
|
|
|
| 323 |
vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
|
| 324 |
+
trackUsage(vectorstore=vectorstore, endpoint="/loadText")
|
| 325 |
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
| 326 |
text = cleanText(text = text)
|
| 327 |
dct = {
|
|
|
|
| 388 |
"output": text,
|
| 389 |
"source": source
|
| 390 |
}
|
| 391 |
+
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 392 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 393 |
fileName = createDataSourceName(sourceName=source)
|
| 394 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
|
|
| 471 |
"output": text,
|
| 472 |
"source": "www.youtube.com"
|
| 473 |
}
|
| 474 |
+
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 475 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 476 |
fileName = createDataSourceName(sourceName="youtube")
|
| 477 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
|
|
| 530 |
return result
|
| 531 |
|
| 532 |
|
| 533 |
+
@app.post("/getDataSource")
|
| 534 |
+
async def getDataSource(vectorstore: str, sourceUrl: str):
|
| 535 |
+
trackUsage(vectorstore=vectorstore, endpoint="/getDataSource")
|
| 536 |
+
r = requests.get(sourceUrl)
|
| 537 |
+
return encodeToBase64(eval(r.content.decode("utf-8")))
|
| 538 |
+
|
| 539 |
+
|
| 540 |
@app.post("/deleteChatbotSource")
|
| 541 |
async def deleteChatbotSource(vectorstore: str, dataSourceName: str):
|
| 542 |
trackUsage(vectorstore=vectorstore, endpoint="/deleteChatbotSource")
|
|
|
|
| 558 |
async def loadEditedJson(loadEditedJsonConfig: LoadEditedJson):
|
| 559 |
username, chatbotName = loadEditedJsonConfig.vectorstore.split("$")[1], loadEditedJsonConfig.vectorstore.split("$")[2]
|
| 560 |
trackUsage(vectorstore=loadEditedJsonConfig.vectorstore, endpoint="/loadEditedJson")
|
| 561 |
+
jsonData = decodeBase64(loadEditedJsonConfig.jsonData)
|
| 562 |
+
jsonData = json.dumps(jsonData, indent = 1).encode("utf-8")
|
| 563 |
fileName = createDataSourceName(loadEditedJsonConfig.dataSourceName)
|
| 564 |
response = supabase.storage.from_("ConversAI").upload(file=jsonData, path=f"{fileName}_data.json")
|
| 565 |
response = (
|
|
|
|
| 615 |
content = file["output"]
|
| 616 |
fileSource = file["source"]
|
| 617 |
texts.append(".".join(
|
| 618 |
+
[content[key] for key in content.keys()]).replace(
|
| 619 |
"\n", " "))
|
| 620 |
sources.append(fileSource)
|
| 621 |
elif fileType == "/loadText":
|
|
|
|
| 631 |
content = file["output"]
|
| 632 |
fileSource = file["source"]
|
| 633 |
texts.append(".".join(
|
| 634 |
+
[content[key] for key in content.keys()]).replace(
|
| 635 |
"\n", " "))
|
| 636 |
sources.append(fileSource)
|
| 637 |
else:
|
functions.py
CHANGED
|
@@ -356,6 +356,24 @@ def extractTextFromUrlList(urls):
|
|
| 356 |
return {x: y for x, y in zip(urls, texts)}
|
| 357 |
|
| 358 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
def createDataSourceName(sourceName):
|
| 360 |
sources = [x["dataSourceName"] for x in client.table("ConversAI_ChatbotDataSources").select("dataSourceName").execute().data]
|
| 361 |
if sourceName not in sources:
|
|
|
|
| 356 |
return {x: y for x, y in zip(urls, texts)}
|
| 357 |
|
| 358 |
|
| 359 |
+
def encodeToBase64(dct: dict):
|
| 360 |
+
for key in dct:
|
| 361 |
+
if type(dct[key]) == str:
|
| 362 |
+
dct[key] = base64.b64encode(dct[key].encode("utf-8")).decode("utf-8")
|
| 363 |
+
elif type(dct[key]) == dict:
|
| 364 |
+
dct[key] = encodeToBase64(dct[key])
|
| 365 |
+
return dct
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def decodeBase64(dct: dict):
|
| 369 |
+
for key in dct:
|
| 370 |
+
if type(dct[key]) == str:
|
| 371 |
+
dct[key] = base64.b64decode(dct[key].encode("utf-8")).decode("utf-8")
|
| 372 |
+
elif type(dct[key]) == dict:
|
| 373 |
+
dct[key] = decodeBase64(dct[key])
|
| 374 |
+
return dct
|
| 375 |
+
|
| 376 |
+
|
| 377 |
def createDataSourceName(sourceName):
|
| 378 |
sources = [x["dataSourceName"] for x in client.table("ConversAI_ChatbotDataSources").select("dataSourceName").execute().data]
|
| 379 |
if sourceName not in sources:
|