Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +10 -3
src/streamlit_app.py
CHANGED
|
@@ -6,6 +6,7 @@ from pathlib import Path
|
|
| 6 |
from typing import List, Annotated, Any
|
| 7 |
import operator
|
| 8 |
import pandas as pd
|
|
|
|
| 9 |
from tqdm import tqdm
|
| 10 |
from pydantic import BaseModel
|
| 11 |
from langchain.embeddings.cohere import CohereEmbeddings
|
|
@@ -19,13 +20,19 @@ from langgraph.graph import StateGraph, START, END, add_messages
|
|
| 19 |
from langgraph.constants import Send
|
| 20 |
from langgraph.checkpoint.memory import MemorySaver
|
| 21 |
|
| 22 |
-
|
| 23 |
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
|
| 24 |
co = cohere.Client(COHERE_API_KEY)
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
def prepare_vectorstore():
|
| 28 |
-
loader = DirectoryLoader(
|
| 29 |
documents = loader.load()
|
| 30 |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
|
| 31 |
docs = splitter.split_documents(documents)
|
|
|
|
| 6 |
from typing import List, Annotated, Any
|
| 7 |
import operator
|
| 8 |
import pandas as pd
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
from tqdm import tqdm
|
| 11 |
from pydantic import BaseModel
|
| 12 |
from langchain.embeddings.cohere import CohereEmbeddings
|
|
|
|
| 20 |
from langgraph.constants import Send
|
| 21 |
from langgraph.checkpoint.memory import MemorySaver
|
| 22 |
|
|
|
|
| 23 |
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
|
| 24 |
co = cohere.Client(COHERE_API_KEY)
|
| 25 |
+
|
| 26 |
+
base_dir = Path(__file__).resolve().parent.parent
|
| 27 |
+
documents_path = base_dir / "documents"
|
| 28 |
+
|
| 29 |
+
if not documents_path.exists():
|
| 30 |
+
raise FileNotFoundError(f"Documents folder not found at {documents_path}")
|
| 31 |
+
|
| 32 |
+
persist_dir = str(base_dir / "chroma_store")
|
| 33 |
|
| 34 |
def prepare_vectorstore():
|
| 35 |
+
loader = DirectoryLoader(str(documents_path), glob="**/*.txt", loader_cls=TextLoader)
|
| 36 |
documents = loader.load()
|
| 37 |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
|
| 38 |
docs = splitter.split_documents(documents)
|