Spaces:
Sleeping
Sleeping
| import io | |
| import tempfile | |
| from starlette import status | |
| from functions import * | |
| import pandas as pd | |
| from fastapi import FastAPI, File, UploadFile, HTTPException | |
| from pydantic import BaseModel | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from src.api.speech_api import speech_translator_router | |
| from functions import client as supabase | |
| from urllib.parse import urlparse | |
| import nltk | |
| import time | |
| import uuid | |
| nltk.download('punkt_tab') | |
| app = FastAPI(title="ConversAI", root_path="/api/v1") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| app.include_router(speech_translator_router, prefix="/speech") | |
| async def sign_up(email, username, password): | |
| res, _ = supabase.auth.sign_up( | |
| {"email": email, "password": password, "role": "user"} | |
| ) | |
| user_id = res[1].id | |
| r_ = createUser(user_id=user_id, username=username) | |
| print(r_) | |
| response = { | |
| "status": "success", | |
| "code": 200, | |
| "message": "Please check you email address for email verification", | |
| } | |
| return response | |
| async def check_session(): | |
| res = supabase.auth.get_session() | |
| return res | |
| async def get_user(access_token): | |
| res = supabase.auth.get_user(jwt=access_token) | |
| return res | |
| async def refresh_token(refresh_token): | |
| res = supabase.auth.refresh_token(refresh_token) | |
| return res | |
| async def sign_in(email, password): | |
| try: | |
| res = supabase.auth.sign_in_with_password( | |
| {"email": email, "password": password} | |
| ) | |
| user_id = res.user.id | |
| access_token = res.session.access_token | |
| refresh_token = res.session.refresh_token | |
| store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute() | |
| store_id = None | |
| if store_session_check and store_session_check.data: | |
| store_id = store_session_check.data[0].get("StoreID") | |
| userData = supabase.table("ConversAI_UserInfo").select("*").filter("user_id", "eq", user_id).execute().data | |
| username = userData[0]["username"] | |
| if not store_id: | |
| response = ( | |
| supabase.table("Stores").insert( | |
| { | |
| "AccessToken": access_token, | |
| "StoreID": user_id, | |
| "RefreshToken": refresh_token, | |
| } | |
| ).execute() | |
| ) | |
| message = { | |
| "message": "Success", | |
| "code": status.HTTP_200_OK, | |
| "username": username, | |
| "user_id": user_id, | |
| "access_token": access_token, | |
| "refresh_token": refresh_token, | |
| } | |
| return message | |
| elif store_id == user_id: | |
| raise HTTPException( | |
| status_code=status.HTTP_400_BAD_REQUEST, | |
| detail="You are already signed in. Please sign out first to sign in again." | |
| ) | |
| else: | |
| raise HTTPException( | |
| status_code=status.HTTP_400_BAD_REQUEST, | |
| detail="Failed to sign in. Please check your credentials." | |
| ) | |
| except HTTPException as http_exc: | |
| raise http_exc | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, | |
| detail=f"An unexpected error occurred during sign-in: {str(e)}" | |
| ) | |
| async def login_with_token(token): | |
| try: | |
| res = supabase.auth.sign_in_with_id_token(token) | |
| print(res) | |
| user_id = res.user.id | |
| access_token = res.session.access_token | |
| refresh_token = res.session.refresh_token | |
| store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute() | |
| store_id = None | |
| if store_session_check and store_session_check.data: | |
| store_id = store_session_check.data[0].get("StoreID") | |
| if not store_id: | |
| response = ( | |
| supabase.table("Stores").insert( | |
| { | |
| "AccessToken": access_token, | |
| "StoreID": user_id, | |
| "RefreshToken": refresh_token, | |
| } | |
| ).execute() | |
| ) | |
| message = { | |
| "message": "Success", | |
| "code": status.HTTP_200_OK, | |
| "user_id": user_id, | |
| "access_token": access_token, | |
| "refresh_token": refresh_token | |
| } | |
| return message | |
| elif store_id == user_id: | |
| raise HTTPException( | |
| status_code=status.HTTP_400_BAD_REQUEST, | |
| detail="You are already signed in. Please sign out first to sign in again." | |
| ) | |
| else: | |
| raise HTTPException( | |
| status_code=status.HTTP_400_BAD_REQUEST, | |
| detail="Failed to sign in. Please check your credentials." | |
| ) | |
| except HTTPException as http_exc: | |
| raise http_exc | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, | |
| detail=f"An unexpected error occurred during sign-in: {str(e)}" | |
| ) | |
| async def set_session_data(access_token, refresh_token): | |
| res = supabase.auth.set_session(access_token, refresh_token) | |
| return res | |
| async def sign_out(user_id): | |
| try: | |
| supabase.table("Stores").delete().eq( | |
| "StoreID", user_id | |
| ).execute() | |
| res = supabase.auth.sign_out() | |
| response = {"message": "success"} | |
| return response | |
| except Exception as e: | |
| raise HTTPException(status_code=400, detail=str(e)) | |
| async def oauth(provider): | |
| res = supabase.auth.sign_in_with_oauth({"provider": provider}) | |
| return res | |
| async def newChatbot(chatbotName: str, username: str): | |
| currentBotCount = len(listTables(username=username)["output"]) | |
| limit = supabase.table("ConversAI_UserConfig").select("chatbotLimit").eq("user_id", username).execute().data[0][ | |
| "chatbotLimit"] | |
| if currentBotCount >= int(limit): | |
| return { | |
| "output": "CHATBOT LIMIT EXCEEDED" | |
| } | |
| supabase.table("ConversAI_ChatbotInfo").insert({"user_id": username, "chatbotname": chatbotName}).execute() | |
| chatbotName = f"convai${username}${chatbotName}" | |
| return createTable(tablename=chatbotName) | |
| async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)): | |
| source = pdf.filename | |
| pdf = await pdf.read() | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file: | |
| temp_file.write(pdf) | |
| temp_file_path = temp_file.name | |
| start = time.time() | |
| text = extractTextFromPdf(temp_file_path) | |
| textExtraction = time.time() | |
| os.remove(temp_file_path) | |
| username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
| df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
| currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0] | |
| limit = supabase.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][ | |
| "tokenLimit"] | |
| newCount = currentCount + len(text) | |
| if newCount < int(limit): | |
| supabase.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq( | |
| "chatbotname", chatbotname).execute() | |
| uploadStart = time.time() | |
| output = addDocuments(text=text, source=source, vectorstore=vectorstore) | |
| uploadEnd = time.time() | |
| uploadTime = f"VECTOR UPLOAD TIME: {uploadEnd - uploadStart}s" + "\n" | |
| timeTaken = f"TEXT EXTRACTION TIME: {textExtraction - start}s" + "\n" | |
| tokenCount = f"TOKEN COUNT: {len(text)}" + "\n" | |
| tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+") | |
| wordCount = f"WORD COUNT: {len(tokenizer.tokenize(text))}" + "\n" | |
| newText = ("=" * 75 + "\n").join([timeTaken, uploadTime, wordCount, tokenCount, "TEXT: \n" + text + "\n"]) | |
| fileId = str(uuid.uuid4()) | |
| with open(f"{fileId}.txt", "w") as file: | |
| file.write(newText) | |
| with open(f"{fileId}.txt", "rb") as f: | |
| supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"}) | |
| os.remove(f"{fileId}.txt") | |
| output["supabaseFileName"] = f"{fileId}.txt" | |
| return output | |
| else: | |
| return { | |
| "output": "DOCUMENT EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT." | |
| } | |
| async def returnText(pdf: UploadFile = File(...)): | |
| source = pdf.filename | |
| pdf = await pdf.read() | |
| start = time.time() | |
| text = getTextFromImagePDF(pdfBytes=pdf) | |
| end = time.time() | |
| timeTaken = f"{end - start}s" | |
| return { | |
| "source": source, | |
| "extractionTime": timeTaken, | |
| "output": text | |
| } | |
| async def addText(vectorstore: str, text: str, source: str | None = None): | |
| username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
| df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
| currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0] | |
| newCount = currentCount + len(text) | |
| limit = supabase.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][ | |
| "tokenLimit"] | |
| if newCount < int(limit): | |
| supabase.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq( | |
| "chatbotname", chatbotname).execute() | |
| uploadStart = time.time() | |
| output = addDocuments(text=text, source=source, vectorstore=vectorstore) | |
| uploadEnd = time.time() | |
| uploadTime = f"VECTOR UPLOAD TIME: {uploadEnd - uploadStart}s" + "\n" | |
| tokenCount = f"TOKEN COUNT: {len(text)}" + "\n" | |
| tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+") | |
| wordCount = f"WORD COUNT: {len(tokenizer.tokenize(text))}" + "\n" | |
| newText = ("=" * 75 + "\n").join([uploadTime, wordCount, tokenCount, "TEXT: \n" + text + "\n"]) | |
| fileId = str(uuid.uuid4()) | |
| with open(f"{fileId}.txt", "w") as file: | |
| file.write(newText) | |
| with open(f"{fileId}.txt", "rb") as f: | |
| supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"}) | |
| os.remove(f"{fileId}.txt") | |
| output["supabaseFileName"] = f"{fileId}.txt" | |
| return output | |
| else: | |
| return { | |
| "output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT." | |
| } | |
| class AddQAPair(BaseModel): | |
| vectorstore: str | |
| question: str | |
| answer: str | |
| async def addQAPairData(addQaPair: AddQAPair): | |
| username, chatbotname = addQaPair.vectorstore.split("$")[1], addQaPair.vectorstore.split("$")[2] | |
| df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
| currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0] | |
| qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}" | |
| newCount = currentCount + len(qa) | |
| limit = supabase.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][ | |
| "tokenLimit"] | |
| if newCount < int(limit): | |
| supabase.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq( | |
| "chatbotname", chatbotname).execute() | |
| return addDocuments(text=qa, source="Q&A Pairs", vectorstore=addQaPair.vectorstore) | |
| else: | |
| return { | |
| "output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT." | |
| } | |
| async def addWebsite(vectorstore: str, websiteUrls: list[str]): | |
| start = time.time() | |
| text = extractTextFromUrlList(urls = websiteUrls) | |
| textExtraction = time.time() | |
| username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
| df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
| currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0] | |
| newCount = currentCount + len(text) | |
| limit = supabase.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0]["tokenLimit"] | |
| if newCount < int(limit): | |
| supabase.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq( | |
| "chatbotname", chatbotname).execute() | |
| uploadStart = time.time() | |
| output = addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore) | |
| uploadEnd = time.time() | |
| uploadTime = f"VECTOR UPLOAD TIME: {uploadEnd - uploadStart}s" + "\n" | |
| timeTaken = f"TEXT EXTRACTION TIME: {textExtraction - start}s" + "\n" | |
| tokenCount = f"TOKEN COUNT: {len(text)}" + "\n" | |
| tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+") | |
| wordCount = f"WORD COUNT: {len(tokenizer.tokenize(text))}" + "\n" | |
| links = "LINKS:\n" + "\n".join(websiteUrls) + "\n" | |
| newText = ("=" * 75 + "\n").join([timeTaken, uploadTime, wordCount, tokenCount, links, "TEXT: \n" + text + "\n"]) | |
| fileId = str(uuid.uuid4()) | |
| with open(f"{fileId}.txt", "w") as file: | |
| file.write(newText) | |
| with open(f"{fileId}.txt", "rb") as f: | |
| supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"}) | |
| os.remove(f"{fileId}.txt") | |
| output["supabaseFileName"] = f"{fileId}.txt" | |
| return output | |
| else: | |
| return { | |
| "output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT." | |
| } | |
| async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"): | |
| username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
| output = answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel) | |
| response = ( | |
| supabase.table("ConversAI_ChatHistory") | |
| .insert({"username": username, "chatbotName": chatbotName, "llmModel": llmModel, "question": query, "response": output["output"]}) | |
| .execute() | |
| ) | |
| return output | |
| async def delete(chatbotName: str): | |
| username, chatbotName = chatbotName.split("$")[1], chatbotName.split("$")[2] | |
| supabase.table('ConversAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname', chatbotName).execute() | |
| return deleteTable(tableName=chatbotName) | |
| async def delete(username: str): | |
| return listTables(username=username) | |
| async def crawlUrl(baseUrl: str): | |
| return { | |
| "urls": getLinks(url=baseUrl, timeout=30) | |
| } | |
| async def getCount(vectorstore: str): | |
| username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
| df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
| return { | |
| "currentCount": df[(df['user_id'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0] | |
| } | |
| async def getYTTranscript(urls: str): | |
| return { | |
| "transcript": getTranscript(urls=urls) | |
| } | |
| async def analyzeAndAnswer(query: str, file: UploadFile = File(...)): | |
| extension = file.filename.split(".")[-1] | |
| try: | |
| if extension in ["xls", "xlsx", "xlsm", "xlsb"]: | |
| df = pd.read_excel(io.BytesIO(await file.read())) | |
| response = analyzeData(query=query, dataframe=df) | |
| elif extension == "csv": | |
| df = pd.read_csv(io.BytesIO(await file.read())) | |
| response = analyzeData(query=query, dataframe=df) | |
| else: | |
| response = "INVALID FILE TYPE" | |
| return { | |
| "output": response | |
| } | |
| except: | |
| return { | |
| "output": "UNABLE TO ANSWER QUERY" | |
| } | |
| async def chatHistory(vectorstore: str): | |
| username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
| response = supabase.table("ConversAI_ChatHistory").select("timestamp", "question", "response").eq("username", username).eq("chatbotName", chatbotName).execute().data | |
| return response |