Spaces:
Sleeping
Sleeping
UPDATE: supabase
Browse files
app.py
CHANGED
|
@@ -10,6 +10,8 @@ from src.api.speech_api import speech_translator_router
|
|
| 10 |
from functions import client as supabase
|
| 11 |
from urllib.parse import urlparse
|
| 12 |
import nltk
|
|
|
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
nltk.download('punkt_tab')
|
|
@@ -301,17 +303,29 @@ async def addText(addQaPair: AddQAPair):
|
|
| 301 |
|
| 302 |
@app.post("/addWebsite")
|
| 303 |
async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
|
|
|
| 304 |
text = extractTextFromUrlList(urls = websiteUrls)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
| 306 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 307 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
| 308 |
newCount = currentCount + len(text)
|
| 309 |
-
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
|
| 310 |
-
"tokenLimit"]
|
| 311 |
if newCount < int(limit):
|
| 312 |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
| 313 |
"chatbotname", chatbotname).execute()
|
| 314 |
-
|
|
|
|
|
|
|
| 315 |
else:
|
| 316 |
return {
|
| 317 |
"output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
|
|
|
|
| 10 |
from functions import client as supabase
|
| 11 |
from urllib.parse import urlparse
|
| 12 |
import nltk
|
| 13 |
+
import time
|
| 14 |
+
import uuid
|
| 15 |
|
| 16 |
|
| 17 |
nltk.download('punkt_tab')
|
|
|
|
| 303 |
|
| 304 |
@app.post("/addWebsite")
|
| 305 |
async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
| 306 |
+
start = time.time()
|
| 307 |
text = extractTextFromUrlList(urls = websiteUrls)
|
| 308 |
+
textExtraction = time.time()
|
| 309 |
+
timeTaken = f"TEXT EXTRACTION TIME: {textExtraction - start}s" + "\n"
|
| 310 |
+
links = "LINKS:\n" + "\n".join(websiteUrls) + "\n"
|
| 311 |
+
newText = timeTaken + links + "TEXT: \n" + text
|
| 312 |
+
fileId = str(uuid.uuid4())
|
| 313 |
+
with open(f"{fileId}.txt", "w") as file:
|
| 314 |
+
file.write(newText)
|
| 315 |
+
with open(f"{fileId}.txt", "rb") as f:
|
| 316 |
+
supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
|
| 317 |
+
os.remove(f"{fileId}.txt")
|
| 318 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
| 319 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 320 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
| 321 |
newCount = currentCount + len(text)
|
| 322 |
+
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0]["tokenLimit"]
|
|
|
|
| 323 |
if newCount < int(limit):
|
| 324 |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
| 325 |
"chatbotname", chatbotname).execute()
|
| 326 |
+
output = addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore)
|
| 327 |
+
output["supabaseFileName"] = f"{fileId}.txt"
|
| 328 |
+
return output
|
| 329 |
else:
|
| 330 |
return {
|
| 331 |
"output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
|