Spaces:
Sleeping
Sleeping
DEBUG: nltk
Browse files- app.py +6 -1
- functions.py +0 -3
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import io
|
|
| 2 |
import tempfile
|
| 3 |
from ipaddress import ip_address
|
| 4 |
from typing import Optional
|
| 5 |
-
|
| 6 |
import jwt
|
| 7 |
import base64
|
| 8 |
import json
|
|
@@ -21,6 +21,9 @@ from collections import Counter, defaultdict
|
|
| 21 |
from datetime import datetime, timedelta
|
| 22 |
from dateutil.parser import isoparse
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
app = FastAPI(title="ConversAI", root_path="/api/v1")
|
| 25 |
|
| 26 |
app.add_middleware(
|
|
@@ -263,6 +266,7 @@ async def loadPDF(vectorstore: str, pdf: UploadFile = File(...)):
|
|
| 263 |
"output": text,
|
| 264 |
"source": source
|
| 265 |
}
|
|
|
|
| 266 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 267 |
fileName = createDataSourceName(sourceName=source)
|
| 268 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
@@ -271,6 +275,7 @@ async def loadPDF(vectorstore: str, pdf: UploadFile = File(...)):
|
|
| 271 |
.insert({"username": username,
|
| 272 |
"chatbotName": chatbotName,
|
| 273 |
"dataSourceName": fileName,
|
|
|
|
| 274 |
"sourceEndpoint": "/loadPDF",
|
| 275 |
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
|
| 276 |
.execute()
|
|
|
|
| 2 |
import tempfile
|
| 3 |
from ipaddress import ip_address
|
| 4 |
from typing import Optional
|
| 5 |
+
import nltk
|
| 6 |
import jwt
|
| 7 |
import base64
|
| 8 |
import json
|
|
|
|
| 21 |
from datetime import datetime, timedelta
|
| 22 |
from dateutil.parser import isoparse
|
| 23 |
|
| 24 |
+
|
| 25 |
+
nltk.download("punkt_tab")
|
| 26 |
+
|
| 27 |
app = FastAPI(title="ConversAI", root_path="/api/v1")
|
| 28 |
|
| 29 |
app.add_middleware(
|
|
|
|
| 266 |
"output": text,
|
| 267 |
"source": source
|
| 268 |
}
|
| 269 |
+
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
| 270 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
| 271 |
fileName = createDataSourceName(sourceName=source)
|
| 272 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
|
|
| 275 |
.insert({"username": username,
|
| 276 |
"chatbotName": chatbotName,
|
| 277 |
"dataSourceName": fileName,
|
| 278 |
+
"numTokens": numTokens,
|
| 279 |
"sourceEndpoint": "/loadPDF",
|
| 280 |
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
|
| 281 |
.execute()
|
functions.py
CHANGED
|
@@ -7,7 +7,6 @@ from langchain_qdrant import QdrantVectorStore
|
|
| 7 |
from langchain_qdrant import RetrievalMode
|
| 8 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
| 9 |
from uuid import uuid4
|
| 10 |
-
import nltk
|
| 11 |
from langchain_core.output_parsers import StrOutputParser
|
| 12 |
from langchain.retrievers import ParentDocumentRetriever
|
| 13 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
|
@@ -35,8 +34,6 @@ import base64
|
|
| 35 |
import time
|
| 36 |
import requests
|
| 37 |
|
| 38 |
-
nltk.download('punkt_tab')
|
| 39 |
-
|
| 40 |
|
| 41 |
load_dotenv("secrets.env")
|
| 42 |
client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
|
|
|
|
| 7 |
from langchain_qdrant import RetrievalMode
|
| 8 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
| 9 |
from uuid import uuid4
|
|
|
|
| 10 |
from langchain_core.output_parsers import StrOutputParser
|
| 11 |
from langchain.retrievers import ParentDocumentRetriever
|
| 12 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
|
|
|
| 34 |
import time
|
| 35 |
import requests
|
| 36 |
|
|
|
|
|
|
|
| 37 |
|
| 38 |
load_dotenv("secrets.env")
|
| 39 |
client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
|