bhavinmatariya commited on
Commit
4ea0a84
·
1 Parent(s): fffe8f4

Change path setting

Browse files
Files changed (1) hide show
  1. api/data_pipeline.py +17 -5
api/data_pipeline.py CHANGED
@@ -221,7 +221,8 @@ def read_all_documents(path: str, is_ollama_embedder: bool = None, excluded_dirs
221
  Returns:
222
  bool: True if the file should be processed, False otherwise
223
  """
224
- file_path_parts = os.path.normpath(file_path).split(os.sep)
 
225
  file_name = os.path.basename(file_path)
226
 
227
  if use_inclusion:
@@ -232,7 +233,9 @@ def read_all_documents(path: str, is_ollama_embedder: bool = None, excluded_dirs
232
  if included_dirs:
233
  for included in included_dirs:
234
  clean_included = included.strip("./").rstrip("/")
235
- if clean_included in file_path_parts:
 
 
236
  is_included = True
237
  break
238
 
@@ -261,14 +264,23 @@ def read_all_documents(path: str, is_ollama_embedder: bool = None, excluded_dirs
261
  # Check if file is in an excluded directory
262
  for excluded in excluded_dirs:
263
  clean_excluded = excluded.strip("./").rstrip("/")
264
- if clean_excluded in file_path_parts:
 
 
265
  is_excluded = True
266
  break
267
 
268
- # Check if file matches excluded file patterns
269
  if not is_excluded:
270
  for excluded_file in excluded_files:
271
- if file_name == excluded_file:
 
 
 
 
 
 
 
272
  is_excluded = True
273
  break
274
 
 
221
  Returns:
222
  bool: True if the file should be processed, False otherwise
223
  """
224
+ # Normalize the file path for cross-platform compatibility
225
+ normalized_path = os.path.normpath(file_path).replace(os.sep, '/')
226
  file_name = os.path.basename(file_path)
227
 
228
  if use_inclusion:
 
233
  if included_dirs:
234
  for included in included_dirs:
235
  clean_included = included.strip("./").rstrip("/")
236
+ # Check if the directory appears in the path - handle both .git and git patterns
237
+ path_parts = normalized_path.split('/')
238
+ if clean_included in path_parts or f".{clean_included}" in path_parts:
239
  is_included = True
240
  break
241
 
 
264
  # Check if file is in an excluded directory
265
  for excluded in excluded_dirs:
266
  clean_excluded = excluded.strip("./").rstrip("/")
267
+ # Check if the directory appears in the path - handle both .git and git patterns
268
+ path_parts = normalized_path.split('/')
269
+ if clean_excluded in path_parts or f".{clean_excluded}" in path_parts:
270
  is_excluded = True
271
  break
272
 
273
+ # Check if file matches excluded file patterns
274
  if not is_excluded:
275
  for excluded_file in excluded_files:
276
+ # Handle pattern matching more robustly
277
+ if excluded_file.startswith("*."):
278
+ # Handle wildcard patterns like "*.pyc"
279
+ extension = excluded_file[1:] # Remove the *
280
+ if file_name.endswith(extension):
281
+ is_excluded = True
282
+ break
283
+ elif file_name == excluded_file:
284
  is_excluded = True
285
  break
286