Spaces:

techconspartners
/

ConversAI

Sleeping

App Files Files Community

Rauhan commited on Aug 2, 2024

Commit

6c7d766

1 Parent(s): 736af94

UPDATE: New Endpoints

Browse files

Files changed (3) hide show

app.py +62 -8
functions.py +2 -6
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import io
 from functions import *
 from PyPDF2 import PdfReader
 from fastapi import FastAPI, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from langchain_community.document_loaders import UnstructuredURLLoader
 app = FastAPI(title = "ConversAI", root_path = "/api/v1")
 app.add_middleware(
     CORSMiddleware,
@@ -29,23 +31,63 @@ async def login(username: str, password: str):
 @app.post("/newChatbot")
 async def newChatbot(chatbotName: str, username: str):
     chatbotName = f"convai-{username}-{chatbotName}"
     return createTable(tablename = chatbotName)
-@app.post("/getRawPDFText")
 async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
     pdf = await pdf.read()
     reader = PdfReader(io.BytesIO(pdf))
     text = ""
     for page in reader.pages:
         text += page.extract_text()
-    return text
-@app.post("/addData")
 async def addText(vectorstore: str, text: str):
-    return addDocuments(text = text, vectorstore = vectorstore)
 @app.post("/answerQuery")
 async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"):
@@ -54,12 +96,24 @@ async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-7
 @app.post("/deleteChatbot")
 async def delete(chatbotName: str):
     return deleteTable(tableName=chatbotName)
 @app.post("/listChatbots")
 async def delete(username: str):
     return listTables(username=username)
-@app.post("/getWebsiteData")
 async def crawlUrl(baseUrl: str):
-    return getRawWebText(url=baseUrl, timeout=30)

 import io
 from functions import *
 from PyPDF2 import PdfReader
+import pandas as pd
 from fastapi import FastAPI, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from langchain_community.document_loaders import UnstructuredURLLoader
 app = FastAPI(title = "ConversAI", root_path = "/api/v1")
 app.add_middleware(
     CORSMiddleware,
 @app.post("/newChatbot")
 async def newChatbot(chatbotName: str, username: str):
+    client.table("ConversAI_ChatbotInfo").insert({"username": username, "chatbotname": chatbotName}).execute()
     chatbotName = f"convai-{username}-{chatbotName}"
     return createTable(tablename = chatbotName)
+@app.post("/addPDF")
 async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
     pdf = await pdf.read()
     reader = PdfReader(io.BytesIO(pdf))
     text = ""
     for page in reader.pages:
         text += page.extract_text()
+    username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
+    df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
+    currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"][0]
+    newCount = currentCount + len(text)
+    if newCount < 400000:
+        client.table("ConversAI_ChatbotInfo").update({"charactercount", newCount}).eq("username", username).eq("chatbotname", chatbotname).execute()
+        return addDocuments(text = text, vectorstore = vectorstore)
+    else:
+        return {
+            "output": "DOCUMENT EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
+        }
+@app.post("/addText")
 async def addText(vectorstore: str, text: str):
+    username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
+    df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
+    currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"][0]
+    newCount = currentCount + len(text)
+    if newCount < 400000:
+        client.table("ConversAI_ChatbotInfo").update({"charactercount", newCount}).eq("username", username).eq("chatbotname", chatbotname).execute()
+        return addDocuments(text = text, vectorstore = vectorstore)
+    else:
+        return {
+            "output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
+        }
+@app.post("/addWebsite")
+async def addWebsite(vectorstore: str, websiteUrl: str):
+    urls = getLinks(websiteUrl)
+    loader = UnstructuredURLLoader(urls=urls)
+    docs = loader.load()
+    text = "\n\n\n\n".join([f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
+    username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
+    df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
+    currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"][0]
+    newCount = currentCount + len(text)
+    if newCount < 400000:
+        client.table("ConversAI_ChatbotInfo").update({"charactercount", newCount}).eq("username", username).eq("chatbotname", chatbotname).execute()
+        return addDocuments(text = text, vectorstore = vectorstore)
+    else:
+        return {
+            "output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
+        }
 @app.post("/answerQuery")
 async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"):
 @app.post("/deleteChatbot")
 async def delete(chatbotName: str):
+    username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
+    client.table('ConversAI_ChatbotInfo').delete().eq('username', username).eq('chatbotname', chatbotName).execute()
     return deleteTable(tableName=chatbotName)
 @app.post("/listChatbots")
 async def delete(username: str):
     return listTables(username=username)
+@app.post("/getLinks")
 async def crawlUrl(baseUrl: str):
+    return {
+        "urls": getLinks(url=baseUrl, timeout=30)
+        }
+@app.post("/getCurrentCount")
+async def getCount(vectorstore: str):
+    username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
+    df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
+    return {
+        "currentCount": df[(df['username'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'][0]
+        }

functions.py CHANGED Viewed

@@ -258,7 +258,7 @@ def listTables(username: str):
         }
-def getRawWebText(url: str, timeout = 30):
     start = time.time()
     def getLinksFromPage(url: str) -> list:
         response = requests.get(url)
@@ -286,8 +286,4 @@ def getRawWebText(url: str, timeout = 30):
             break
         else:
             uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
-    allLinks = {}
-    foundLinks = list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
-    for link in foundLinks:
-        allLinks[link] = BeautifulSoup(requests.get(link).text, "lxml").body.get_text(" ", strip = True)
-    return allLinks

         }
+def getLinks(url: str, timeout = 30):
     start = time.time()
     def getLinksFromPage(url: str) -> list:
         response = requests.get(url)
             break
         else:
             uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
+    return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))

requirements.txt CHANGED Viewed

@@ -13,6 +13,7 @@ langchain-groq
 lxml
 PyPDF2
 python-dotenv
 sentence-transformers
 supabase
 unstructured

 lxml
 PyPDF2
 python-dotenv
+pandas
 sentence-transformers
 supabase
 unstructured