Spaces:
Running on T4
Running on T4
Commit ·
023005c
1
Parent(s): 36e2b47
Lazy load heavy LLM libraries for faster startup
Browse files- src/backend/chatbot.py +29 -17
src/backend/chatbot.py
CHANGED
|
@@ -1,26 +1,35 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import os
|
| 4 |
-
from langchain.chat_models import ChatOpenAI
|
| 5 |
-
from langchain.prompts.chat import (
|
| 6 |
-
ChatPromptTemplate,
|
| 7 |
-
SystemMessagePromptTemplate,
|
| 8 |
-
AIMessagePromptTemplate,
|
| 9 |
-
HumanMessagePromptTemplate,
|
| 10 |
-
)
|
| 11 |
-
from llama_index import (
|
| 12 |
-
SimpleDirectoryReader,
|
| 13 |
-
VectorStoreIndex,
|
| 14 |
-
ServiceContext,
|
| 15 |
-
)
|
| 16 |
-
from llama_index.llms import LlamaCPP
|
| 17 |
-
from llama_index.llms.llama_utils import (
|
| 18 |
-
messages_to_prompt,
|
| 19 |
-
completion_to_prompt,
|
| 20 |
-
)
|
| 21 |
import subprocess
|
| 22 |
import time
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# set version
|
| 25 |
# st.session_state.demo_lite = False
|
| 26 |
|
|
@@ -87,6 +96,9 @@ def init_llm(model, demo_lite):
|
|
| 87 |
if demo_lite == False:
|
| 88 |
print("BP 5 : running full demo")
|
| 89 |
|
|
|
|
|
|
|
|
|
|
| 90 |
# Detect GPU and environment
|
| 91 |
env_config = detect_gpu_and_environment()
|
| 92 |
n_gpu_layers = env_config["n_gpu_layers"]
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import subprocess
|
| 5 |
import time
|
| 6 |
|
| 7 |
+
# Lazy imports - only load when actually needed (saves 5-10 seconds on startup)
|
| 8 |
+
def _lazy_import_llm_libs():
|
| 9 |
+
"""Import heavy LLM libraries only when needed"""
|
| 10 |
+
global ChatOpenAI, ChatPromptTemplate, SystemMessagePromptTemplate
|
| 11 |
+
global AIMessagePromptTemplate, HumanMessagePromptTemplate
|
| 12 |
+
global SimpleDirectoryReader, VectorStoreIndex, ServiceContext
|
| 13 |
+
global LlamaCPP, messages_to_prompt, completion_to_prompt
|
| 14 |
+
|
| 15 |
+
from langchain.chat_models import ChatOpenAI
|
| 16 |
+
from langchain.prompts.chat import (
|
| 17 |
+
ChatPromptTemplate,
|
| 18 |
+
SystemMessagePromptTemplate,
|
| 19 |
+
AIMessagePromptTemplate,
|
| 20 |
+
HumanMessagePromptTemplate,
|
| 21 |
+
)
|
| 22 |
+
from llama_index import (
|
| 23 |
+
SimpleDirectoryReader,
|
| 24 |
+
VectorStoreIndex,
|
| 25 |
+
ServiceContext,
|
| 26 |
+
)
|
| 27 |
+
from llama_index.llms import LlamaCPP
|
| 28 |
+
from llama_index.llms.llama_utils import (
|
| 29 |
+
messages_to_prompt,
|
| 30 |
+
completion_to_prompt,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
# set version
|
| 34 |
# st.session_state.demo_lite = False
|
| 35 |
|
|
|
|
| 96 |
if demo_lite == False:
|
| 97 |
print("BP 5 : running full demo")
|
| 98 |
|
| 99 |
+
# Load heavy LLM libraries now (lazy import)
|
| 100 |
+
_lazy_import_llm_libs()
|
| 101 |
+
|
| 102 |
# Detect GPU and environment
|
| 103 |
env_config = detect_gpu_and_environment()
|
| 104 |
n_gpu_layers = env_config["n_gpu_layers"]
|