kdevoe commited on
Commit
e13dd34
·
verified ·
1 Parent(s): 4fb848b

Trying cache resource on vectordb build

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -17,12 +17,6 @@ from sklearn.model_selection import train_test_split
17
 
18
  # # Download dataset
19
  file_path = "dataset-tickets-multi-lang-4-20k.csv"
20
- # # Load the latest version
21
- # df = kagglehub.load_dataset(
22
- # KaggleDatasetAdapter.PANDAS,
23
- # "tobiasbueck/multilingual-customer-support-tickets",
24
- # file_path,
25
- # )
26
 
27
  df = pd.read_csv(file_path)
28
 
@@ -47,13 +41,23 @@ documents = loader.load()
47
 
48
  # Get OpenAI setup
49
  openai_api_key = os.getenv("openai_token")
50
- embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
51
 
52
- vectordb = Chroma.from_documents(
53
- documents=documents,
54
- embedding=embedding,
55
- persist_directory=persist_directory
56
- )
 
 
 
 
 
 
 
 
 
 
57
 
58
  # @st.cache_resource
59
  # def get_vectordb():
 
17
 
18
  # # Download dataset
19
  file_path = "dataset-tickets-multi-lang-4-20k.csv"
 
 
 
 
 
 
20
 
21
  df = pd.read_csv(file_path)
22
 
 
41
 
42
  # Get OpenAI setup
43
  openai_api_key = os.getenv("openai_token")
44
+ # embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
45
 
46
+ # vectordb = Chroma.from_documents(
47
+ # documents=documents,
48
+ # embedding=embedding,
49
+ # persist_directory=persist_directory
50
+ # )
51
+
52
+ @st.cache_resource
53
+ def get_vectordb():
54
+ embedding = OpenAIEmbeddings(openai_api_key=os.getenv("openai_token"))
55
+ return Chroma.from_documents(
56
+ documents=documents,
57
+ embedding=embedding,
58
+ persist_directory=persist_directory)
59
+
60
+ vectordb = get_vectordb()
61
 
62
  # @st.cache_resource
63
  # def get_vectordb():