Spaces:
Sleeping
Sleeping
UPDATE: trainChatbot
Browse files- app.py +16 -2
- functions.py +12 -5
app.py
CHANGED
|
@@ -320,7 +320,7 @@ async def loadText(addTextConfig: AddText):
|
|
| 320 |
vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
|
| 321 |
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
| 322 |
dct = {
|
| 323 |
-
"output": text,
|
| 324 |
"source": "Text"
|
| 325 |
}
|
| 326 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
|
@@ -544,13 +544,27 @@ async def loadEditedJson(loadEditedJsonConfig: LoadEditedJson):
|
|
| 544 |
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
|
| 545 |
.execute()
|
| 546 |
)
|
| 547 |
-
|
| 548 |
return {
|
| 549 |
"output": "SUCCESS"
|
| 550 |
}
|
| 551 |
|
| 552 |
|
| 553 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
class TrainChatbot(BaseModel):
|
| 556 |
vectorstore: str
|
|
|
|
| 320 |
vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
|
| 321 |
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
| 322 |
dct = {
|
| 323 |
+
"output": cleanText(text = text),
|
| 324 |
"source": "Text"
|
| 325 |
}
|
| 326 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
|
|
|
| 544 |
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
|
| 545 |
.execute()
|
| 546 |
)
|
|
|
|
| 547 |
return {
|
| 548 |
"output": "SUCCESS"
|
| 549 |
}
|
| 550 |
|
| 551 |
|
| 552 |
|
| 553 |
+
@app.post("/publicOrPrivate")
|
| 554 |
+
async def publicOrPrivate(vectorstore: str, mode: str = "public"):
|
| 555 |
+
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
| 556 |
+
response = (
|
| 557 |
+
supabase.table("ConversAI_ChatbotInfo")
|
| 558 |
+
.update({"public/private": mode})
|
| 559 |
+
.eq("user_id", username)
|
| 560 |
+
.eq("chatbotname", chatbotName)
|
| 561 |
+
.execute()
|
| 562 |
+
)
|
| 563 |
+
return {
|
| 564 |
+
"output": "SUCCESS"
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
|
| 568 |
|
| 569 |
class TrainChatbot(BaseModel):
|
| 570 |
vectorstore: str
|
functions.py
CHANGED
|
@@ -7,6 +7,7 @@ from langchain_qdrant import QdrantVectorStore
|
|
| 7 |
from langchain_qdrant import RetrievalMode
|
| 8 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
| 9 |
from uuid import uuid4
|
|
|
|
| 10 |
from langchain_core.output_parsers import StrOutputParser
|
| 11 |
from langchain.retrievers import ParentDocumentRetriever
|
| 12 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
|
@@ -120,6 +121,10 @@ def createTable(tablename: str):
|
|
| 120 |
"output": "SUCCESS"
|
| 121 |
}
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
def addDocuments(texts: list[tuple[str]], vectorstore: str):
|
| 125 |
global vectorEmbeddings
|
|
@@ -288,7 +293,8 @@ def getLinks(url: str, timeout=30):
|
|
| 288 |
def getTextFromImagePDF(pdfBytes):
|
| 289 |
def getText(image):
|
| 290 |
global reader
|
| 291 |
-
|
|
|
|
| 292 |
|
| 293 |
allImages = convert_from_bytes(pdfBytes)
|
| 294 |
texts = [base64.b64encode(getText(image).encode("utf-8")).decode("utf-8") for image in allImages]
|
|
@@ -303,7 +309,7 @@ def getTranscript(urls: str):
|
|
| 303 |
url, add_video_info=False
|
| 304 |
)
|
| 305 |
doc = " ".join([x.page_content for x in loader.load()])
|
| 306 |
-
texts.append(doc)
|
| 307 |
except:
|
| 308 |
doc = ""
|
| 309 |
texts.append(doc)
|
|
@@ -325,7 +331,7 @@ def analyzeData(query, dataframe):
|
|
| 325 |
|
| 326 |
|
| 327 |
def extractTextFromPage(page):
|
| 328 |
-
text = page.get_text()
|
| 329 |
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
| 330 |
|
| 331 |
|
|
@@ -343,7 +349,7 @@ def extractTextFromUrl(url):
|
|
| 343 |
response.raise_for_status()
|
| 344 |
html = response.text
|
| 345 |
soup = BeautifulSoup(html, 'lxml')
|
| 346 |
-
text = soup.get_text(separator=' ', strip=True)
|
| 347 |
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
| 348 |
|
| 349 |
|
|
@@ -361,4 +367,5 @@ def createDataSourceName(sourceName):
|
|
| 361 |
i = 1
|
| 362 |
while True:
|
| 363 |
sourceName = sourceName + "-" + str(i)
|
| 364 |
-
return createDataSourceName(sourceName)
|
|
|
|
|
|
| 7 |
from langchain_qdrant import RetrievalMode
|
| 8 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
| 9 |
from uuid import uuid4
|
| 10 |
+
import nltk
|
| 11 |
from langchain_core.output_parsers import StrOutputParser
|
| 12 |
from langchain.retrievers import ParentDocumentRetriever
|
| 13 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
|
|
|
| 121 |
"output": "SUCCESS"
|
| 122 |
}
|
| 123 |
|
| 124 |
+
def cleanText(text: str):
|
| 125 |
+
text = text.replace("\n", " ")
|
| 126 |
+
text = text.translate(str.maketrans('', '', string.punctuation.replace(".", "")))
|
| 127 |
+
return text
|
| 128 |
|
| 129 |
def addDocuments(texts: list[tuple[str]], vectorstore: str):
|
| 130 |
global vectorEmbeddings
|
|
|
|
| 293 |
def getTextFromImagePDF(pdfBytes):
|
| 294 |
def getText(image):
|
| 295 |
global reader
|
| 296 |
+
text = "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
| 297 |
+
return cleanText(text = text)
|
| 298 |
|
| 299 |
allImages = convert_from_bytes(pdfBytes)
|
| 300 |
texts = [base64.b64encode(getText(image).encode("utf-8")).decode("utf-8") for image in allImages]
|
|
|
|
| 309 |
url, add_video_info=False
|
| 310 |
)
|
| 311 |
doc = " ".join([x.page_content for x in loader.load()])
|
| 312 |
+
texts.append(cleanText(text = doc))
|
| 313 |
except:
|
| 314 |
doc = ""
|
| 315 |
texts.append(doc)
|
|
|
|
| 331 |
|
| 332 |
|
| 333 |
def extractTextFromPage(page):
|
| 334 |
+
text = cleanText(text = page.get_text())
|
| 335 |
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
| 336 |
|
| 337 |
|
|
|
|
| 349 |
response.raise_for_status()
|
| 350 |
html = response.text
|
| 351 |
soup = BeautifulSoup(html, 'lxml')
|
| 352 |
+
text = cleanText(text = soup.get_text(separator=' ', strip=True))
|
| 353 |
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
| 354 |
|
| 355 |
|
|
|
|
| 367 |
i = 1
|
| 368 |
while True:
|
| 369 |
sourceName = sourceName + "-" + str(i)
|
| 370 |
+
return createDataSourceName(sourceName)
|
| 371 |
+
|