cmd0160 commited on
Commit
67fb4c0
·
1 Parent(s): 4ca8d97

Adding .csv ingestion functionality

Browse files
Files changed (1) hide show
  1. src/ingest.py +17 -7
src/ingest.py CHANGED
@@ -8,13 +8,23 @@ from langchain_community.embeddings import OpenAIEmbeddings
8
 
9
 
10
  def load_documents(data_dir: str):
11
- loader = DirectoryLoader(
12
- data_dir,
13
- glob="**/*.txt",
14
- loader_cls=TextLoader,
15
- show_progress=True,
16
- )
17
- docs = loader.load()
 
 
 
 
 
 
 
 
 
 
18
  print(f"Loaded {len(docs)} documents from {data_dir}")
19
  print("Documents ingested:")
20
  for d in docs:
 
8
 
9
 
10
  def load_documents(data_dir: str):
11
+ from pathlib import Path
12
+ from langchain_community.document_loaders import CSVLoader, TextLoader
13
+
14
+ docs = []
15
+ for path in Path(data_dir).rglob("*"):
16
+ if not path.is_file():
17
+ continue
18
+ suffix = path.suffix.lower()
19
+ if suffix == ".txt":
20
+ loader = TextLoader(str(path))
21
+ elif suffix == ".csv":
22
+ loader = CSVLoader(file_path=str(path))
23
+ else:
24
+ continue
25
+ loaded = loader.load()
26
+ docs.extend(loaded)
27
+
28
  print(f"Loaded {len(docs)} documents from {data_dir}")
29
  print("Documents ingested:")
30
  for d in docs: