Spaces:

techconspartners
/

ConversAI

Sleeping

App Files Files Community

ishworrsubedii commited on Aug 14, 2024

Commit

3fa5f95

1 Parent(s): ba4a6fd

username --- changed to -- user_id

Browse files

Files changed (2) hide show

app.py +32 -22
functions.py +63 -55

app.py CHANGED Viewed

@@ -20,15 +20,16 @@ app.add_middleware(
     allow_headers=["*"],
 )
-app.include_router(speech_translator_router, prefix="/speech")
 @app.post("/signup")
-async def sign_up(email, password):
     try:
         res, _ = supabase.auth.sign_up(
             {"email": email, "password": password, "role": "user"}
         )
         response = {
             "status": "success",
             "code": 200,
@@ -56,6 +57,8 @@ async def sign_in(email, password):
         user_id = res.user.id
         access_token = res.session.access_token
         refresh_token = res.session.refresh_token
         store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
         try:
             store_id = store_session_check[1][0]["StoreID"]
@@ -113,10 +116,17 @@ async def set_session_data(access_token, refresh_token):
 @app.post("/logout")
-async def sign_out():
-    res = supabase.auth.sign_out()
-    return res
 @app.post("/oauth")
@@ -129,13 +139,13 @@ async def oauth(provider):
 @app.post("/newChatbot")
 async def newChatbot(chatbotName: str, username: str):
     currentBotCount = len(listTables(username=username)["output"])
-    limit = client.table("ConversAI_UserConfig").select("chatbotLimit").eq("username", username).execute().data[0][
         "chatbotLimit"]
     if currentBotCount >= int(limit):
         return {
             "output": "CHATBOT LIMIT EXCEEDED"
         }
-    client.table("ConversAI_ChatbotInfo").insert({"username": username, "chatbotname": chatbotName}).execute()
     chatbotName = f"convai-{username}-{chatbotName}"
     return createTable(tablename=chatbotName)
@@ -149,12 +159,12 @@ async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
         text += page.extract_text()
     username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
-    currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
-    limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("username", username).execute().data[0][
         "tokenLimit"]
     newCount = currentCount + len(text)
     if newCount < int(limit):
-        client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("username", username).eq(
             "chatbotname", chatbotname).execute()
         return addDocuments(text=text, vectorstore=vectorstore)
     else:
@@ -174,12 +184,12 @@ async def returnText(pdf: UploadFile = File(...)):
 async def addText(vectorstore: str, text: str):
     username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
-    currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
     newCount = currentCount + len(text)
-    limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("username", username).execute().data[0][
         "tokenLimit"]
     if newCount < int(limit):
-        client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("username", username).eq(
             "chatbotname", chatbotname).execute()
         return addDocuments(text=text, vectorstore=vectorstore)
     else:
@@ -198,13 +208,13 @@ class AddQAPair(BaseModel):
 async def addText(addQaPair: AddQAPair):
     username, chatbotname = addQaPair.vectorstore.split("-")[1], addQaPair.vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
-    currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
     qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}"
     newCount = currentCount + len(qa)
-    limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("username", username).execute().data[0][
         "tokenLimit"]
     if newCount < int(limit):
-        client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("username", username).eq(
             "chatbotname", chatbotname).execute()
         return addDocuments(text=qa, vectorstore=addQaPair.vectorstore)
     else:
@@ -222,12 +232,12 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
         [f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
     username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
-    currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
     newCount = currentCount + len(text)
-    limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("username", username).execute().data[0][
         "tokenLimit"]
     if newCount < int(limit):
-        client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("username", username).eq(
             "chatbotname", chatbotname).execute()
         return addDocuments(text=text, vectorstore=vectorstore)
     else:
@@ -244,7 +254,7 @@ async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-7
 @app.post("/deleteChatbot")
 async def delete(chatbotName: str):
     username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
-    client.table('ConversAI_ChatbotInfo').delete().eq('username', username).eq('chatbotname', chatbotName).execute()
     return deleteTable(tableName=chatbotName)
@@ -265,7 +275,7 @@ async def getCount(vectorstore: str):
     username, chatbotName = vectorstore.split("-")[1], vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
     return {
-        "currentCount": df[(df['username'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
     }
@@ -294,4 +304,4 @@ async def analyzeAndAnswer(query: str, file: UploadFile = File(...)):
     except:
         return {
             "output": "UNABLE TO ANSWER QUERY"
-        }

     allow_headers=["*"],
 )
+# app.include_router(speech_translator_router, prefix="/speech")
 @app.post("/signup")
+async def sign_up(email, username, password):
     try:
         res, _ = supabase.auth.sign_up(
             {"email": email, "password": password, "role": "user"}
         )
+        createUser(username=username)
         response = {
             "status": "success",
             "code": 200,
         user_id = res.user.id
         access_token = res.session.access_token
         refresh_token = res.session.refresh_token
+        createUser(username=user_id)
         store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
         try:
             store_id = store_session_check[1][0]["StoreID"]
 @app.post("/logout")
+async def sign_out(store_id):
+    try:
+        supabase.table("Stores").delete().eq(
+            "StoreID", store_id
+        ).execute()
+        res = supabase.auth.sign_out()
+        response = {"message": "success"}
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
 @app.post("/oauth")
 @app.post("/newChatbot")
 async def newChatbot(chatbotName: str, username: str):
     currentBotCount = len(listTables(username=username)["output"])
+    limit = client.table("ConversAI_UserConfig").select("chatbotLimit").eq("user_id", username).execute().data[0][
         "chatbotLimit"]
     if currentBotCount >= int(limit):
         return {
             "output": "CHATBOT LIMIT EXCEEDED"
         }
+    client.table("ConversAI_ChatbotInfo").insert({"user_id": username, "chatbotname": chatbotName}).execute()
     chatbotName = f"convai-{username}-{chatbotName}"
     return createTable(tablename=chatbotName)
         text += page.extract_text()
     username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
+    currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
+    limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
         "tokenLimit"]
     newCount = currentCount + len(text)
     if newCount < int(limit):
+        client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
             "chatbotname", chatbotname).execute()
         return addDocuments(text=text, vectorstore=vectorstore)
     else:
 async def addText(vectorstore: str, text: str):
     username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
+    currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
     newCount = currentCount + len(text)
+    limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
         "tokenLimit"]
     if newCount < int(limit):
+        client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
             "chatbotname", chatbotname).execute()
         return addDocuments(text=text, vectorstore=vectorstore)
     else:
 async def addText(addQaPair: AddQAPair):
     username, chatbotname = addQaPair.vectorstore.split("-")[1], addQaPair.vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
+    currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
     qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}"
     newCount = currentCount + len(qa)
+    limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
         "tokenLimit"]
     if newCount < int(limit):
+        client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
             "chatbotname", chatbotname).execute()
         return addDocuments(text=qa, vectorstore=addQaPair.vectorstore)
     else:
         [f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
     username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
+    currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
     newCount = currentCount + len(text)
+    limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
         "tokenLimit"]
     if newCount < int(limit):
+        client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
             "chatbotname", chatbotname).execute()
         return addDocuments(text=text, vectorstore=vectorstore)
     else:
 @app.post("/deleteChatbot")
 async def delete(chatbotName: str):
     username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
+    client.table('ConversAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname', chatbotName).execute()
     return deleteTable(tableName=chatbotName)
     username, chatbotName = vectorstore.split("-")[1], vectorstore.split("-")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
     return {
+        "currentCount": df[(df['user_id'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
     }
     except:
         return {
             "output": "UNABLE TO ANSWER QUERY"
+        }

functions.py CHANGED Viewed

@@ -32,19 +32,18 @@ import base64
 import time
 import requests
 load_dotenv("secrets.env")
 client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
 qdrantClient = QdrantClient(url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"])
 model_kwargs = {"device": "cuda"}
 encode_kwargs = {"normalize_embeddings": True}
 vectorEmbeddings = HuggingFaceEmbeddings(
-    model_name = "BAAI/bge-m3",
-    model_kwargs = model_kwargs,
-    encode_kwargs = encode_kwargs
 )
-reader = easyocr.Reader(['en'], gpu = True, model_storage_directory = "/app/EasyOCRModels")
-sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25")
 prompt = """
 INSTRUCTIONS:
 =====================================
@@ -81,46 +80,48 @@ store = InMemoryStore()
 chatHistoryStore = dict()
-def createUser(username: str, password: str) -> None:
     try:
         userData = client.table("ConversAI_UserInfo").select("*").execute().data
-        if username not in [userData[x]["username"] for x in range(len(userData))]:
-            client.table("ConversAI_UserInfo").insert({"username": username, "password": password}).execute()
-            client.table("ConversAI_UserConfig").insert({"username": username}).execute()
             return {
                 "output": "SUCCESS"
             }
-        else:
             return {
                 "output": "USER ALREADY EXISTS"
             }
     except Exception as e:
         return {
             "error": e
-        }
-def matchPassword(username: str, password: str) -> str:
-    response = (
-    client.table("ConversAI_UserInfo")
-    .select("*")
-    .eq("username", username)
-    .execute()
-    )
-    try: return {
-        "output": password == response.data[0]["password"]
-        }
-    except: return {
-        "output": "USER DOESN'T EXIST"
-        }
 def createTable(tablename: str):
     global vectorEmbeddings
     global sparseEmbeddings
     qdrant = QdrantVectorStore.from_documents(
-        documents = [],
-        embedding = vectorEmbeddings,
         sparse_embedding=sparseEmbeddings,
         url=os.environ["QDRANT_URL"],
         prefer_grpc=True,
@@ -132,21 +133,22 @@ def createTable(tablename: str):
         "output": "SUCCESS"
     }
 def addDocuments(text: str, vectorstore: str):
     global vectorEmbeddings
     global sparseEmbeddings
     global store
     parentSplitter = RecursiveCharacterTextSplitter(
-        chunk_size = 2100,
-        add_start_index = True
     )
     childSplitter = RecursiveCharacterTextSplitter(
-        chunk_size = 300,
-        add_start_index = True
     )
-    texts = [Document(page_content = text)]
     vectorstore = QdrantVectorStore.from_existing_collection(
-        embedding = vectorEmbeddings,
         sparse_embedding=sparseEmbeddings,
         collection_name=vectorstore,
         url=os.environ["QDRANT_URL"],
@@ -159,7 +161,7 @@ def addDocuments(text: str, vectorstore: str):
         child_splitter=childSplitter,
         parent_splitter=parentSplitter
     )
-    retriever.add_documents(documents = texts)
     return {
         "output": "SUCCESS"
     }
@@ -169,7 +171,8 @@ def format_docs(docs: str):
     context = "\n\n".join(doc.page_content for doc in docs)
     if context == "":
         context = "No context found"
-    else: pass
     return context
@@ -186,19 +189,19 @@ def trimMessages(chain_input):
             pass
         else:
             chatHistoryStore[storeName].clear()
-            for message in messages[-1: ]:
                 chatHistoryStore[storeName].add_message(message)
     return True
 def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192") -> str:
-    global prompt
     global client
     global vectorEmbeddings
     global sparseEmbeddings
     vectorStoreName = vectorstore
     vectorstore = QdrantVectorStore.from_existing_collection(
-        embedding = vectorEmbeddings,
         sparse_embedding=sparseEmbeddings,
         collection_name=vectorstore,
         url=os.environ["QDRANT_URL"],
@@ -216,25 +219,25 @@ def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192")
         base_compressor=compressor, base_retriever=retriever
     )
     baseChain = (
-        {"context": RunnableLambda(lambda x: x["question"]) | retriever | RunnableLambda(format_docs), "question": RunnablePassthrough(), "chatHistory": RunnablePassthrough()}
-        | prompt
-        | ChatGroq(model = llmModel, temperature = 0.75, max_tokens = 512)
-        | StrOutputParser()
-        )
     messageChain = RunnableWithMessageHistory(
         baseChain,
         get_session_history,
-        input_messages_key = "question",
-        history_messages_key = "chatHistory"
     )
-    chain = RunnablePassthrough.assign(messages_trimmed = trimMessages) | messageChain
     return {
         "output": chain.invoke(
             {"question": query},
             {"configurable": {"session_id": vectorStoreName}}
         )
     }
 def deleteTable(tableName: str):
@@ -249,21 +252,24 @@ def deleteTable(tableName: str):
             "error": e
         }
 def listTables(username: str):
     try:
         global qdrantClient
         qdrantCollections = qdrantClient.get_collections()
         return {
-            "output": list(filter(lambda x: True if x.split("-")[1] == username else False, [x.name for x in qdrantCollections.collections]))
         }
     except Exception as e:
         return {
             "error": e
         }
-def getLinks(url: str, timeout = 30):
     start = time.time()
     def getLinksFromPage(url: str) -> list:
         response = requests.get(url)
         soup = BeautifulSoup(response.content, "lxml")
@@ -281,6 +287,7 @@ def getLinks(url: str, timeout = 30):
             else:
                 continue
         return links
     links = getLinksFromPage(url)
     uniqueLinks = set()
     for link in links:
@@ -292,22 +299,23 @@ def getLinks(url: str, timeout = 30):
     return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
 def getTextFromImagePDF(pdfBytes):
     def getText(image):
         global reader
         return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
     allImages = convert_from_bytes(pdfBytes)
     texts = [getText(image) for image in allImages]
     return "\n\n\n".join(texts)
 def getTranscript(urls: str):
     urls = urls.split(",")
     texts = []
     for url in urls:
         try:
             loader = YoutubeLoader.from_youtube_url(
-                url, add_video_info = False
             )
             doc = " ".join([x.page_content for x in loader.load()])
             texts.append(doc)
@@ -318,12 +326,12 @@ def getTranscript(urls: str):
 def analyzeData(query, dataframe):
-    llm = ChatGroq(name = "llama-3.1-8b-instant")
-    df = SmartDataframe(dataframe, config = {"llm": llm, "verbose": False})
     response = df.chat(query)
     if os.path.isfile(response):
         with open(response, "rb") as file:
             b64string = base64.b64encode(file.read()).decode("utf-8")
         return f"data:image/png;base64,{b64string}"
     else:
-        return response

 import time
 import requests
 load_dotenv("secrets.env")
 client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
 qdrantClient = QdrantClient(url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"])
 model_kwargs = {"device": "cuda"}
 encode_kwargs = {"normalize_embeddings": True}
 vectorEmbeddings = HuggingFaceEmbeddings(
+    model_name="BAAI/bge-m3",
+    model_kwargs=model_kwargs,
+    encode_kwargs=encode_kwargs
 )
+reader = easyocr.Reader(['en'], gpu=True, model_storage_directory="/app/EasyOCRModels")
+sparseEmbeddings = FastEmbedSparse(model="Qdrant/BM25")
 prompt = """
 INSTRUCTIONS:
 =====================================
 chatHistoryStore = dict()
+def createUser(username: str) -> dict:
     try:
         userData = client.table("ConversAI_UserInfo").select("*").execute().data
+        if username not in [userData[x]["user_id"] for x in range(len(userData))]:
+            client.table("ConversAI_UserInfo").insert({"user_id": username}).execute()
+            client.table("ConversAI_UserConfig").insert({"user_id": username}).execute()
             return {
                 "output": "SUCCESS"
             }
+        else:
             return {
                 "output": "USER ALREADY EXISTS"
             }
     except Exception as e:
         return {
             "error": e
+        }
+# def matchPassword(username: str, password: str) -> str:
+#     response = (
+#         client.table("ConversAI_UserInfo")
+#         .select("*")
+#         .eq("username", username)
+#         .execute()
+#     )
+#     try:
+#         return {
+#             "output": password == response.data[0]["password"]
+#         }
+#     except:
+#         return {
+#             "output": "USER DOESN'T EXIST"
+#         }
 def createTable(tablename: str):
     global vectorEmbeddings
     global sparseEmbeddings
     qdrant = QdrantVectorStore.from_documents(
+        documents=[],
+        embedding=vectorEmbeddings,
         sparse_embedding=sparseEmbeddings,
         url=os.environ["QDRANT_URL"],
         prefer_grpc=True,
         "output": "SUCCESS"
     }
 def addDocuments(text: str, vectorstore: str):
     global vectorEmbeddings
     global sparseEmbeddings
     global store
     parentSplitter = RecursiveCharacterTextSplitter(
+        chunk_size=2100,
+        add_start_index=True
     )
     childSplitter = RecursiveCharacterTextSplitter(
+        chunk_size=300,
+        add_start_index=True
     )
+    texts = [Document(page_content=text)]
     vectorstore = QdrantVectorStore.from_existing_collection(
+        embedding=vectorEmbeddings,
         sparse_embedding=sparseEmbeddings,
         collection_name=vectorstore,
         url=os.environ["QDRANT_URL"],
         child_splitter=childSplitter,
         parent_splitter=parentSplitter
     )
+    retriever.add_documents(documents=texts)
     return {
         "output": "SUCCESS"
     }
     context = "\n\n".join(doc.page_content for doc in docs)
     if context == "":
         context = "No context found"
+    else:
+        pass
     return context
             pass
         else:
             chatHistoryStore[storeName].clear()
+            for message in messages[-1:]:
                 chatHistoryStore[storeName].add_message(message)
     return True
 def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192") -> str:
+    global prompt
     global client
     global vectorEmbeddings
     global sparseEmbeddings
     vectorStoreName = vectorstore
     vectorstore = QdrantVectorStore.from_existing_collection(
+        embedding=vectorEmbeddings,
         sparse_embedding=sparseEmbeddings,
         collection_name=vectorstore,
         url=os.environ["QDRANT_URL"],
         base_compressor=compressor, base_retriever=retriever
     )
     baseChain = (
+            {"context": RunnableLambda(lambda x: x["question"]) | retriever | RunnableLambda(format_docs),
+             "question": RunnablePassthrough(), "chatHistory": RunnablePassthrough()}
+            | prompt
+            | ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
+            | StrOutputParser()
+    )
     messageChain = RunnableWithMessageHistory(
         baseChain,
         get_session_history,
+        input_messages_key="question",
+        history_messages_key="chatHistory"
     )
+    chain = RunnablePassthrough.assign(messages_trimmed=trimMessages) | messageChain
     return {
         "output": chain.invoke(
             {"question": query},
             {"configurable": {"session_id": vectorStoreName}}
         )
     }
 def deleteTable(tableName: str):
             "error": e
         }
 def listTables(username: str):
     try:
         global qdrantClient
         qdrantCollections = qdrantClient.get_collections()
         return {
+            "output": list(filter(lambda x: True if x.split("-")[1] == username else False,
+                                  [x.name for x in qdrantCollections.collections]))
         }
     except Exception as e:
         return {
             "error": e
         }
+def getLinks(url: str, timeout=30):
     start = time.time()
     def getLinksFromPage(url: str) -> list:
         response = requests.get(url)
         soup = BeautifulSoup(response.content, "lxml")
             else:
                 continue
         return links
     links = getLinksFromPage(url)
     uniqueLinks = set()
     for link in links:
     return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
 def getTextFromImagePDF(pdfBytes):
     def getText(image):
         global reader
         return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
     allImages = convert_from_bytes(pdfBytes)
     texts = [getText(image) for image in allImages]
     return "\n\n\n".join(texts)
 def getTranscript(urls: str):
     urls = urls.split(",")
     texts = []
     for url in urls:
         try:
             loader = YoutubeLoader.from_youtube_url(
+                url, add_video_info=False
             )
             doc = " ".join([x.page_content for x in loader.load()])
             texts.append(doc)
 def analyzeData(query, dataframe):
+    llm = ChatGroq(name="llama-3.1-8b-instant")
+    df = SmartDataframe(dataframe, config={"llm": llm, "verbose": False})
     response = df.chat(query)
     if os.path.isfile(response):
         with open(response, "rb") as file:
             b64string = base64.b64encode(file.read()).decode("utf-8")
         return f"data:image/png;base64,{b64string}"
     else:
+        return response