Spaces:
Build error
Build error
add info on the application startup to catch errors
#7
by
RCaz
- opened
app.py
CHANGED
|
@@ -11,7 +11,7 @@ from langchain.chat_models import init_chat_model
|
|
| 11 |
llm = init_chat_model("gpt-5-nano",
|
| 12 |
model_provider="openai",
|
| 13 |
api_key=os.environ['OPENAI_API_KEY'])
|
| 14 |
-
|
| 15 |
|
| 16 |
# load retreiver
|
| 17 |
import os
|
|
@@ -33,19 +33,20 @@ def load_from_azure(container_name, local_dir="./index"):
|
|
| 33 |
file.write(container_client.download_blob(blob).readall())
|
| 34 |
|
| 35 |
# Download files from Azure
|
|
|
|
| 36 |
load_from_azure("blobcontaineravatarbot")
|
| 37 |
-
|
| 38 |
# Load into FAISS
|
| 39 |
# from langchain_community.embeddings import HuggingFaceEmbeddings # deprecated
|
| 40 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 41 |
-
|
| 42 |
embedding_model = HuggingFaceEmbeddings(
|
| 43 |
model_name="intfloat/e5-base-v2",
|
| 44 |
# multi_process=True,
|
| 45 |
model_kwargs={"device": "cuda"}, # use cuda for faster embeddings on nbidia GPUs
|
| 46 |
encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
|
| 47 |
)
|
| 48 |
-
|
| 49 |
vectorstore = FAISS.load_local("./index", embedding_model, allow_dangerous_deserialization=True)
|
| 50 |
|
| 51 |
# Include a rate limiter
|
|
@@ -77,7 +78,7 @@ class RateLimiter:
|
|
| 77 |
if now - req_time < self.window
|
| 78 |
]
|
| 79 |
return self.max_requests - len(self.requests[identifier])
|
| 80 |
-
|
| 81 |
limiter = RateLimiter(max_requests=10, window_minutes=60)
|
| 82 |
|
| 83 |
# setup chatbot
|
|
@@ -181,10 +182,9 @@ os.environ["LANGSMITH_API_KEY"] = os.environ['LANGSMITH_API_KEY']
|
|
| 181 |
|
| 182 |
# lauch gradio app
|
| 183 |
import gradio as gr
|
| 184 |
-
|
| 185 |
iface = gr.ChatInterface(
|
| 186 |
predict,
|
| 187 |
api_name="chat",
|
| 188 |
)
|
| 189 |
-
|
| 190 |
iface.launch(share=True)
|
|
|
|
| 11 |
llm = init_chat_model("gpt-5-nano",
|
| 12 |
model_provider="openai",
|
| 13 |
api_key=os.environ['OPENAI_API_KEY'])
|
| 14 |
+
print("LLM Init.")
|
| 15 |
|
| 16 |
# load retreiver
|
| 17 |
import os
|
|
|
|
| 33 |
file.write(container_client.download_blob(blob).readall())
|
| 34 |
|
| 35 |
# Download files from Azure
|
| 36 |
+
print("start download faiss")
|
| 37 |
load_from_azure("blobcontaineravatarbot")
|
| 38 |
+
print("ok.")
|
| 39 |
# Load into FAISS
|
| 40 |
# from langchain_community.embeddings import HuggingFaceEmbeddings # deprecated
|
| 41 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 42 |
+
print("load embeddings")
|
| 43 |
embedding_model = HuggingFaceEmbeddings(
|
| 44 |
model_name="intfloat/e5-base-v2",
|
| 45 |
# multi_process=True,
|
| 46 |
model_kwargs={"device": "cuda"}, # use cuda for faster embeddings on nbidia GPUs
|
| 47 |
encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
|
| 48 |
)
|
| 49 |
+
print("load vector store")
|
| 50 |
vectorstore = FAISS.load_local("./index", embedding_model, allow_dangerous_deserialization=True)
|
| 51 |
|
| 52 |
# Include a rate limiter
|
|
|
|
| 78 |
if now - req_time < self.window
|
| 79 |
]
|
| 80 |
return self.max_requests - len(self.requests[identifier])
|
| 81 |
+
print("Rate Limit init.")
|
| 82 |
limiter = RateLimiter(max_requests=10, window_minutes=60)
|
| 83 |
|
| 84 |
# setup chatbot
|
|
|
|
| 182 |
|
| 183 |
# lauch gradio app
|
| 184 |
import gradio as gr
|
|
|
|
| 185 |
iface = gr.ChatInterface(
|
| 186 |
predict,
|
| 187 |
api_name="chat",
|
| 188 |
)
|
| 189 |
+
print("Launch ...")
|
| 190 |
iface.launch(share=True)
|