Spaces:
Runtime error
Runtime error
chore: Update langchain_community.vectorstores imports and add Chroma vectorstore
Browse files- .gitattributes +1 -0
- app.py +40 -11
- langchain_chroma/chroma.sqlite3 +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/data_level0.bin +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/header.bin +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/index_metadata.pickle +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/length.bin +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/link_lists.bin +3 -0
- langchain_faiss/index.faiss +2 -2
- langchain_faiss/index.pkl +2 -2
.gitattributes
CHANGED
|
@@ -34,6 +34,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
langchain_faiss/* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 37 |
*.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
*.faiss filter=lfs diff=lfs merge=lfs -text
|
| 39 |
*.msg filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
langchain_faiss/* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
langchain_chroma/* filter=lfs diff=lfs merge=lfs -text
|
| 38 |
*.pdf filter=lfs diff=lfs merge=lfs -text
|
| 39 |
*.faiss filter=lfs diff=lfs merge=lfs -text
|
| 40 |
*.msg filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -16,7 +16,7 @@ from langchain_community.document_loaders.parsers.language.language_parser impor
|
|
| 16 |
LanguageParser,
|
| 17 |
)
|
| 18 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 19 |
-
from langchain_community.vectorstores import FAISS
|
| 20 |
from langchain_core.callbacks.manager import CallbackManager
|
| 21 |
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 22 |
from langchain_core.output_parsers import StrOutputParser
|
|
@@ -129,18 +129,37 @@ cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
|
|
| 129 |
|
| 130 |
# Create and save FAISS index
|
| 131 |
FAISS_DB_INDEX = "./langchain_faiss"
|
| 132 |
-
#
|
| 133 |
-
#
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
FAISS_DB_INDEX, cached_embeddings, allow_dangerous_deserialization=True
|
| 136 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
# Create retrievers
|
| 139 |
-
faiss_retriever =
|
|
|
|
| 140 |
bm25_retriever = BM25Retriever.from_documents(combined_documents)
|
| 141 |
bm25_retriever.k = 10
|
| 142 |
ensemble_retriever = EnsembleRetriever(
|
| 143 |
-
retrievers=[bm25_retriever, faiss_retriever
|
|
|
|
| 144 |
)
|
| 145 |
|
| 146 |
# Create prompt template
|
|
@@ -172,11 +191,14 @@ class StreamCallback(BaseCallbackHandler):
|
|
| 172 |
print(token, end="", flush=True)
|
| 173 |
|
| 174 |
|
|
|
|
|
|
|
|
|
|
| 175 |
# Initialize LLMs with configuration
|
| 176 |
llm = ChatOpenAI(
|
| 177 |
model="gpt-4o",
|
| 178 |
temperature=0,
|
| 179 |
-
streaming=
|
| 180 |
callbacks=[StreamCallback()],
|
| 181 |
).configurable_alternatives(
|
| 182 |
ConfigurableField(id="llm"),
|
|
@@ -220,11 +242,11 @@ rag_chain = (
|
|
| 220 |
)
|
| 221 |
|
| 222 |
|
| 223 |
-
model_key = os.getenv("
|
| 224 |
-
print("
|
| 225 |
|
| 226 |
|
| 227 |
-
def
|
| 228 |
message,
|
| 229 |
history: list[tuple[str, str]],
|
| 230 |
):
|
|
@@ -234,11 +256,18 @@ def respond(
|
|
| 234 |
yield response
|
| 235 |
|
| 236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
"""
|
| 238 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
| 239 |
"""
|
| 240 |
demo = gr.ChatInterface(
|
| 241 |
-
respond,
|
| 242 |
title="๋ญ์ฒด์ธ์ ๋ํด์ ๋ฌผ์ด๋ณด์ธ์!",
|
| 243 |
description="์๋
ํ์ธ์!\n์ ๋ ๋ญ์ฒด์ธ์ ๋ํ ์ธ๊ณต์ง๋ฅ QA๋ด์
๋๋ค. ๋ญ์ฒด์ธ์ ๋ํด ๊น์ ์ง์์ ๊ฐ์ง๊ณ ์์ด์. ๋ญ์ฒด์ธ ๊ฐ๋ฐ์ ๊ดํ ๋์์ด ํ์ํ์๋ฉด ์ธ์ ๋ ์ง ์ง๋ฌธํด์ฃผ์ธ์!",
|
| 244 |
)
|
|
|
|
| 16 |
LanguageParser,
|
| 17 |
)
|
| 18 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 19 |
+
from langchain_community.vectorstores import FAISS, Chroma
|
| 20 |
from langchain_core.callbacks.manager import CallbackManager
|
| 21 |
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 22 |
from langchain_core.output_parsers import StrOutputParser
|
|
|
|
| 129 |
|
| 130 |
# Create and save FAISS index
|
| 131 |
FAISS_DB_INDEX = "./langchain_faiss"
|
| 132 |
+
# faiss_db = FAISS.from_documents(
|
| 133 |
+
# documents=combined_documents,
|
| 134 |
+
# embedding=cached_embeddings,
|
| 135 |
+
# )
|
| 136 |
+
# faiss_db.save_local(folder_path=FAISS_DB_INDEX)
|
| 137 |
+
|
| 138 |
+
# Create and save Chroma index
|
| 139 |
+
CHROMA_DB_INDEX = "./langchain_chroma"
|
| 140 |
+
# chroma_db = Chroma.from_documents(
|
| 141 |
+
# documents=combined_documents,
|
| 142 |
+
# embedding=cached_embeddings,
|
| 143 |
+
# persist_directory=CHROMA_DB_INDEX,
|
| 144 |
+
# )
|
| 145 |
+
|
| 146 |
+
# load vectorstore
|
| 147 |
+
faiss_db = FAISS.load_local(
|
| 148 |
FAISS_DB_INDEX, cached_embeddings, allow_dangerous_deserialization=True
|
| 149 |
)
|
| 150 |
+
chroma_db = Chroma(
|
| 151 |
+
embedding_function=cached_embeddings,
|
| 152 |
+
persist_directory=CHROMA_DB_INDEX,
|
| 153 |
+
)
|
| 154 |
|
| 155 |
# Create retrievers
|
| 156 |
+
faiss_retriever = faiss_db.as_retriever(search_type="mmr", search_kwargs={"k": 10})
|
| 157 |
+
chroma_retriever = chroma_db.as_retriever(search_type="mmr", search_kwargs={"k": 10})
|
| 158 |
bm25_retriever = BM25Retriever.from_documents(combined_documents)
|
| 159 |
bm25_retriever.k = 10
|
| 160 |
ensemble_retriever = EnsembleRetriever(
|
| 161 |
+
retrievers=[bm25_retriever, faiss_retriever, chroma_retriever],
|
| 162 |
+
weights=[0.4, 0.3, 0.3],
|
| 163 |
)
|
| 164 |
|
| 165 |
# Create prompt template
|
|
|
|
| 191 |
print(token, end="", flush=True)
|
| 192 |
|
| 193 |
|
| 194 |
+
streaming = os.getenv("STREAMING", "true") == "true"
|
| 195 |
+
print("STREAMING", streaming)
|
| 196 |
+
|
| 197 |
# Initialize LLMs with configuration
|
| 198 |
llm = ChatOpenAI(
|
| 199 |
model="gpt-4o",
|
| 200 |
temperature=0,
|
| 201 |
+
streaming=streaming,
|
| 202 |
callbacks=[StreamCallback()],
|
| 203 |
).configurable_alternatives(
|
| 204 |
ConfigurableField(id="llm"),
|
|
|
|
| 242 |
)
|
| 243 |
|
| 244 |
|
| 245 |
+
model_key = os.getenv("MODEL_KEY", "gemini")
|
| 246 |
+
print("MODEL_KEY", model_key)
|
| 247 |
|
| 248 |
|
| 249 |
+
def respond_stream(
|
| 250 |
message,
|
| 251 |
history: list[tuple[str, str]],
|
| 252 |
):
|
|
|
|
| 256 |
yield response
|
| 257 |
|
| 258 |
|
| 259 |
+
def respond(
|
| 260 |
+
message,
|
| 261 |
+
history: list[tuple[str, str]],
|
| 262 |
+
):
|
| 263 |
+
return rag_chain.with_config(configurable={"llm": model_key}).invoke(message)
|
| 264 |
+
|
| 265 |
+
|
| 266 |
"""
|
| 267 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
| 268 |
"""
|
| 269 |
demo = gr.ChatInterface(
|
| 270 |
+
respond_stream if streaming else respond,
|
| 271 |
title="๋ญ์ฒด์ธ์ ๋ํด์ ๋ฌผ์ด๋ณด์ธ์!",
|
| 272 |
description="์๋
ํ์ธ์!\n์ ๋ ๋ญ์ฒด์ธ์ ๋ํ ์ธ๊ณต์ง๋ฅ QA๋ด์
๋๋ค. ๋ญ์ฒด์ธ์ ๋ํด ๊น์ ์ง์์ ๊ฐ์ง๊ณ ์์ด์. ๋ญ์ฒด์ธ ๊ฐ๋ฐ์ ๊ดํ ๋์์ด ํ์ํ์๋ฉด ์ธ์ ๋ ์ง ์ง๋ฌธํด์ฃผ์ธ์!",
|
| 273 |
)
|
langchain_chroma/chroma.sqlite3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:825e12c771f36b9c3e1f7aa3410a662227600a7c2c361960a0369b29b633871f
|
| 3 |
+
size 189218816
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/data_level0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9f756c81ce3c4911b5c264c5332a0f1dcd2b3bc98dd84dbdb94d01bd9cf927b
|
| 3 |
+
size 63540000
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/header.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fcf6e0965f08173cd9d1d2b68e7d8c9a6971bd865faa9a8cfd8ba994cccc64b
|
| 3 |
+
size 100
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/index_metadata.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff4868df92f9bdadacd1abe27aef8cdb3704a0b16304fdd33ae539ce6c39d371
|
| 3 |
+
size 868175
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/length.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57bba9bb25e471e0266df7225b0e7b12ae290d7237cbf2cf16cb3f9503c4a75d
|
| 3 |
+
size 60000
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/link_lists.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b31f9bfa91a0635928b7b7f2b64977111f7f8fb6207ed42b3d0443518680d5c
|
| 3 |
+
size 132080
|
langchain_faiss/index.faiss
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7794ae040ff5830ba667d78cd4a1648e52e8bd860175f68a74ab85ea56ab3bb2
|
| 3 |
+
size 61632557
|
langchain_faiss/index.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a9c82717db2899e9cdd3196f4806a0807167fe9e7892e3a2c00b5b5c6cb6c9b
|
| 3 |
+
size 19914288
|