Spaces:
Sleeping
Sleeping
DEBUG: WebURLs
Browse files- app.py +8 -8
- functions.py +4 -4
app.py
CHANGED
|
@@ -294,7 +294,7 @@ async def loadImagePDF(vectorstore: str, pdf: UploadFile = File(...)):
|
|
| 294 |
"output": text,
|
| 295 |
"source": source
|
| 296 |
}
|
| 297 |
-
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 298 |
fileName = createDataSourceName(sourceName=source)
|
| 299 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 300 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
@@ -329,7 +329,7 @@ async def loadText(addTextConfig: AddText):
|
|
| 329 |
"source": "Text"
|
| 330 |
}
|
| 331 |
numTokens = len(text.translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 332 |
-
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 333 |
fileName = createDataSourceName(sourceName="Text")
|
| 334 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
| 335 |
response = (
|
|
@@ -389,7 +389,7 @@ async def loadWebURLs(loadWebsite: LoadWebsite):
|
|
| 389 |
"source": source
|
| 390 |
}
|
| 391 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 392 |
-
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 393 |
fileName = createDataSourceName(sourceName=source)
|
| 394 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
| 395 |
response = (
|
|
@@ -472,7 +472,7 @@ async def loadYoutubeTranscript(ytTranscript: YtTranscript):
|
|
| 472 |
"source": "www.youtube.com"
|
| 473 |
}
|
| 474 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 475 |
-
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 476 |
fileName = createDataSourceName(sourceName="youtube")
|
| 477 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
| 478 |
response = (
|
|
@@ -534,7 +534,7 @@ async def listChatbotSources(vectorstore: str):
|
|
| 534 |
async def getDataSource(vectorstore: str, sourceUrl: str):
|
| 535 |
trackUsage(vectorstore=vectorstore, endpoint="/getDataSource")
|
| 536 |
r = requests.get(sourceUrl)
|
| 537 |
-
return encodeToBase64(eval(r.content.decode("utf-8")))
|
| 538 |
|
| 539 |
|
| 540 |
@app.post("/deleteChatbotSource")
|
|
@@ -624,7 +624,7 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
|
|
| 624 |
for source, fileType in zip(UrlSources, fileTypes):
|
| 625 |
if ((fileType == "/loadPDF") | (fileType == "/loadImagePDF")):
|
| 626 |
r = requests.get(source)
|
| 627 |
-
file = eval(r.content.decode("utf-8"))
|
| 628 |
content = file["output"]
|
| 629 |
fileSource = file["source"]
|
| 630 |
texts.append(".".join(
|
|
@@ -633,14 +633,14 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
|
|
| 633 |
sources.append(fileSource)
|
| 634 |
elif fileType == "/loadText":
|
| 635 |
r = requests.get(source)
|
| 636 |
-
file = eval(r.content.decode("utf-8"))
|
| 637 |
content = file["output"]
|
| 638 |
fileSource = file["source"]
|
| 639 |
texts.append(content.replace("\n", " "))
|
| 640 |
sources.append(fileSource)
|
| 641 |
elif ((fileType == "/loadWebURLs") | (fileType == "/loadYoutubeTranscript")):
|
| 642 |
r = requests.get(source)
|
| 643 |
-
file = eval(r.content.decode("utf-8"))
|
| 644 |
content = file["output"]
|
| 645 |
fileSource = file["source"]
|
| 646 |
texts.append(".".join(
|
|
|
|
| 294 |
"output": text,
|
| 295 |
"source": source
|
| 296 |
}
|
| 297 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
|
| 298 |
fileName = createDataSourceName(sourceName=source)
|
| 299 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 300 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
|
|
| 329 |
"source": "Text"
|
| 330 |
}
|
| 331 |
numTokens = len(text.translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 332 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
|
| 333 |
fileName = createDataSourceName(sourceName="Text")
|
| 334 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
| 335 |
response = (
|
|
|
|
| 389 |
"source": source
|
| 390 |
}
|
| 391 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 392 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
|
| 393 |
fileName = createDataSourceName(sourceName=source)
|
| 394 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
| 395 |
response = (
|
|
|
|
| 472 |
"source": "www.youtube.com"
|
| 473 |
}
|
| 474 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 475 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
|
| 476 |
fileName = createDataSourceName(sourceName="youtube")
|
| 477 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
| 478 |
response = (
|
|
|
|
| 534 |
async def getDataSource(vectorstore: str, sourceUrl: str):
|
| 535 |
trackUsage(vectorstore=vectorstore, endpoint="/getDataSource")
|
| 536 |
r = requests.get(sourceUrl)
|
| 537 |
+
return encodeToBase64(eval(r.content.decode("utf-8", errors = "replace")))
|
| 538 |
|
| 539 |
|
| 540 |
@app.post("/deleteChatbotSource")
|
|
|
|
| 624 |
for source, fileType in zip(UrlSources, fileTypes):
|
| 625 |
if ((fileType == "/loadPDF") | (fileType == "/loadImagePDF")):
|
| 626 |
r = requests.get(source)
|
| 627 |
+
file = eval(r.content.decode("utf-8", errors = "replace"))
|
| 628 |
content = file["output"]
|
| 629 |
fileSource = file["source"]
|
| 630 |
texts.append(".".join(
|
|
|
|
| 633 |
sources.append(fileSource)
|
| 634 |
elif fileType == "/loadText":
|
| 635 |
r = requests.get(source)
|
| 636 |
+
file = eval(r.content.decode("utf-8", errors = "replace"))
|
| 637 |
content = file["output"]
|
| 638 |
fileSource = file["source"]
|
| 639 |
texts.append(content.replace("\n", " "))
|
| 640 |
sources.append(fileSource)
|
| 641 |
elif ((fileType == "/loadWebURLs") | (fileType == "/loadYoutubeTranscript")):
|
| 642 |
r = requests.get(source)
|
| 643 |
+
file = eval(r.content.decode("utf-8", errors = "replace"))
|
| 644 |
content = file["output"]
|
| 645 |
fileSource = file["source"]
|
| 646 |
texts.append(".".join(
|
functions.py
CHANGED
|
@@ -348,7 +348,7 @@ def analyzeData(query, dataframe):
|
|
| 348 |
response = df.chat(query)
|
| 349 |
if os.path.isfile(response):
|
| 350 |
with open(response, "rb") as file:
|
| 351 |
-
b64string = base64.b64encode(file.read()).decode("utf-8")
|
| 352 |
return f"data:image/png;base64,{b64string}"
|
| 353 |
else:
|
| 354 |
return response
|
|
@@ -384,7 +384,7 @@ def extractTextFromUrlList(urls):
|
|
| 384 |
def encodeToBase64(dct: dict):
|
| 385 |
for key in dct:
|
| 386 |
if type(dct[key]) == str:
|
| 387 |
-
dct[key] = base64.b64encode(dct[key].encode("utf-8")).decode("utf-8", errors = "replace")
|
| 388 |
elif type(dct[key]) == dict:
|
| 389 |
dct[key] = encodeToBase64(dct[key])
|
| 390 |
return dct
|
|
@@ -392,10 +392,10 @@ def encodeToBase64(dct: dict):
|
|
| 392 |
|
| 393 |
def decodeBase64(dct: dict):
|
| 394 |
if type(dct["output"]) == str:
|
| 395 |
-
dct["output"] = base64.b64decode(dct["output"].encode("utf-8")).decode("utf-8", errors = "replace")
|
| 396 |
else:
|
| 397 |
for key in dct["output"]:
|
| 398 |
-
dct["output"][key] = base64.b64decode(dct["output"][key].encode("utf-8")).decode("utf-8", errors = "replace")
|
| 399 |
return dct
|
| 400 |
|
| 401 |
|
|
|
|
| 348 |
response = df.chat(query)
|
| 349 |
if os.path.isfile(response):
|
| 350 |
with open(response, "rb") as file:
|
| 351 |
+
b64string = base64.b64encode(file.read()).decode("utf-8", errors = "replace")
|
| 352 |
return f"data:image/png;base64,{b64string}"
|
| 353 |
else:
|
| 354 |
return response
|
|
|
|
| 384 |
def encodeToBase64(dct: dict):
|
| 385 |
for key in dct:
|
| 386 |
if type(dct[key]) == str:
|
| 387 |
+
dct[key] = base64.b64encode(dct[key].encode("utf-8", errors = "replace")).decode("utf-8", errors = "replace")
|
| 388 |
elif type(dct[key]) == dict:
|
| 389 |
dct[key] = encodeToBase64(dct[key])
|
| 390 |
return dct
|
|
|
|
| 392 |
|
| 393 |
def decodeBase64(dct: dict):
|
| 394 |
if type(dct["output"]) == str:
|
| 395 |
+
dct["output"] = base64.b64decode(dct["output"].encode("utf-8", errors = "replace")).decode("utf-8", errors = "replace")
|
| 396 |
else:
|
| 397 |
for key in dct["output"]:
|
| 398 |
+
dct["output"][key] = base64.b64decode(dct["output"][key].encode("utf-8", errors = "replace")).decode("utf-8", errors = "replace")
|
| 399 |
return dct
|
| 400 |
|
| 401 |
|