Spaces:
Sleeping
Sleeping
DEBUG: updating getLinks
Browse files- app.py +5 -4
- functions.py +2 -1
app.py
CHANGED
|
@@ -10,6 +10,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 10 |
from langchain_community.document_loaders import UnstructuredURLLoader
|
| 11 |
from src.api.speech_api import speech_translator_router
|
| 12 |
from functions import client as supabase
|
|
|
|
| 13 |
|
| 14 |
app = FastAPI(title="ConversAI", root_path="/api/v1")
|
| 15 |
|
|
@@ -224,11 +225,11 @@ async def addText(addQaPair: AddQAPair):
|
|
| 224 |
|
| 225 |
@app.post("/addWebsite")
|
| 226 |
async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
| 227 |
-
|
| 228 |
-
loader = UnstructuredURLLoader(urls=urls)
|
| 229 |
docs = loader.load()
|
| 230 |
text = "\n\n".join(
|
| 231 |
-
[f"
|
|
|
|
| 232 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
| 233 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 234 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
|
@@ -238,7 +239,7 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
|
| 238 |
if newCount < int(limit):
|
| 239 |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
| 240 |
"chatbotname", chatbotname).execute()
|
| 241 |
-
return addDocuments(text=text, source=
|
| 242 |
else:
|
| 243 |
return {
|
| 244 |
"output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
|
|
|
|
| 10 |
from langchain_community.document_loaders import UnstructuredURLLoader
|
| 11 |
from src.api.speech_api import speech_translator_router
|
| 12 |
from functions import client as supabase
|
| 13 |
+
from urllib.parse import urlparse
|
| 14 |
|
| 15 |
app = FastAPI(title="ConversAI", root_path="/api/v1")
|
| 16 |
|
|
|
|
| 225 |
|
| 226 |
@app.post("/addWebsite")
|
| 227 |
async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
| 228 |
+
loader = UnstructuredURLLoader(urls=websiteUrls)
|
|
|
|
| 229 |
docs = loader.load()
|
| 230 |
text = "\n\n".join(
|
| 231 |
+
[f"{docs[doc].page_content}" for doc in range(len(docs))]
|
| 232 |
+
)
|
| 233 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
| 234 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 235 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
|
|
|
| 239 |
if newCount < int(limit):
|
| 240 |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
| 241 |
"chatbotname", chatbotname).execute()
|
| 242 |
+
return addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore)
|
| 243 |
else:
|
| 244 |
return {
|
| 245 |
"output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
|
functions.py
CHANGED
|
@@ -154,6 +154,7 @@ def addDocuments(text: str, source: str, vectorstore: str):
|
|
| 154 |
def format_docs(docs: str):
|
| 155 |
context = ""
|
| 156 |
for doc in docs:
|
|
|
|
| 157 |
context += f"CONTENT: {doc.page_content}\nSOURCE: {doc.metadata} \n\n\n"
|
| 158 |
if context == "":
|
| 159 |
context = "No context found"
|
|
@@ -255,7 +256,7 @@ def listTables(username: str):
|
|
| 255 |
|
| 256 |
def getLinks(url: str, timeout=30):
|
| 257 |
start = time.time()
|
| 258 |
-
|
| 259 |
def getLinksFromPage(url: str) -> list:
|
| 260 |
response = requests.get(url)
|
| 261 |
soup = BeautifulSoup(response.content, "lxml")
|
|
|
|
| 154 |
def format_docs(docs: str):
|
| 155 |
context = ""
|
| 156 |
for doc in docs:
|
| 157 |
+
print("METADATA ::: ", type(doc.metadata))
|
| 158 |
context += f"CONTENT: {doc.page_content}\nSOURCE: {doc.metadata} \n\n\n"
|
| 159 |
if context == "":
|
| 160 |
context = "No context found"
|
|
|
|
| 256 |
|
| 257 |
def getLinks(url: str, timeout=30):
|
| 258 |
start = time.time()
|
| 259 |
+
|
| 260 |
def getLinksFromPage(url: str) -> list:
|
| 261 |
response = requests.get(url)
|
| 262 |
soup = BeautifulSoup(response.content, "lxml")
|